@@ -2491,6 +2491,13 @@ def __init__(self):
2491
2491
'd' : (- 1.798e+308 , + 8.988e+307 )
2492
2492
}
2493
2493
2494
+ self .OLD_TYPE_MAPPING = \
2495
+ {
2496
+ 'i' : 252 ,
2497
+ 'f' : 254 ,
2498
+ 'b' : 251
2499
+ }
2500
+
2494
2501
2495
2502
class StataReader (StataParser ):
2496
2503
"""
@@ -2547,42 +2554,64 @@ def __init__(self, path_or_buf, encoding=None):
2547
2554
2548
2555
def _read_header (self ):
2549
2556
# header
2550
- format_version = struct .unpack ('b' , self .path_or_buf .read (1 ))[0 ]
2551
- if format_version not in [113 , 114 , 115 ]:
2552
- raise ValueError ("Version of given Stata file is not 113 (Stata 8/9), 114 (Stata 10/11) or 115 (Stata 12)" )
2557
+ self . format_version = struct .unpack ('b' , self .path_or_buf .read (1 ))[0 ]
2558
+ if self . format_version not in [104 , 105 , 108 , 113 , 114 , 115 ]:
2559
+ raise ValueError ("Version of given Stata file is not 104, 105, 108, 113 (Stata 8/9), 114 (Stata 10/11) or 115 (Stata 12)" )
2553
2560
self .byteorder = self .path_or_buf .read (1 ) == 0x1 and '>' or '<'
2554
2561
self .filetype = struct .unpack ('b' , self .path_or_buf .read (1 ))[0 ]
2555
2562
self .path_or_buf .read (1 ) # unused
2556
2563
2557
2564
self .nvar = struct .unpack (self .byteorder + 'H' , self .path_or_buf .read (2 ))[0 ]
2558
2565
self .nobs = struct .unpack (self .byteorder + 'I' , self .path_or_buf .read (4 ))[0 ]
2559
- self .data_label = self .path_or_buf .read (81 )
2560
- self .time_stamp = self .path_or_buf .read (18 )
2566
+ if self .format_version > 105 :
2567
+ self .data_label = self .path_or_buf .read (81 )
2568
+ else :
2569
+ self .data_label = self .path_or_buf .read (32 )
2570
+ if self .format_version > 104 :
2571
+ self .time_stamp = self .path_or_buf .read (18 )
2561
2572
2562
2573
# descriptors
2563
- typlist = [ord (self .path_or_buf .read (1 )) for i in range (self .nvar )]
2574
+ if self .format_version > 108 :
2575
+ typlist = [ord (self .path_or_buf .read (1 )) for i in range (self .nvar )]
2576
+ else :
2577
+ typlist = [self .OLD_TYPE_MAPPING [self .path_or_buf .read (1 ).decode (self .encoding )] for i in range (self .nvar )]
2564
2578
self .typlist = [self .TYPE_MAP [typ ] for typ in typlist ]
2565
2579
self .dtyplist = [self .DTYPE_MAP [typ ] for typ in typlist ]
2566
- self .varlist = [self ._null_terminate (self .path_or_buf .read (33 )) for i in range (self .nvar )]
2580
+ if self .format_version > 108 :
2581
+ self .varlist = [self ._null_terminate (self .path_or_buf .read (33 )) for i in range (self .nvar )]
2582
+ else :
2583
+ self .varlist = [self ._null_terminate (self .path_or_buf .read (9 )) for i in range (self .nvar )]
2567
2584
self .srtlist = struct .unpack (self .byteorder + ('h' * (self .nvar + 1 )), self .path_or_buf .read (2 * (self .nvar + 1 )))[:- 1 ]
2568
- if format_version <= 113 :
2585
+ if self .format_version > 113 :
2586
+ self .fmtlist = [self ._null_terminate (self .path_or_buf .read (49 )) for i in range (self .nvar )]
2587
+ elif self .format_version > 104 :
2569
2588
self .fmtlist = [self ._null_terminate (self .path_or_buf .read (12 )) for i in range (self .nvar )]
2570
2589
else :
2571
- self .fmtlist = [self ._null_terminate (self .path_or_buf .read (49 )) for i in range (self .nvar )]
2572
- self .lbllist = [self ._null_terminate (self .path_or_buf .read (33 )) for i in range (self .nvar )]
2573
- self .vlblist = [self ._null_terminate (self .path_or_buf .read (81 )) for i in range (self .nvar )]
2590
+ self .fmtlist = [self ._null_terminate (self .path_or_buf .read (7 )) for i in range (self .nvar )]
2591
+ if self .format_version > 108 :
2592
+ self .lbllist = [self ._null_terminate (self .path_or_buf .read (33 )) for i in range (self .nvar )]
2593
+ else :
2594
+ self .lbllist = [self ._null_terminate (self .path_or_buf .read (9 )) for i in range (self .nvar )]
2595
+ if self .format_version > 105 :
2596
+ self .vlblist = [self ._null_terminate (self .path_or_buf .read (81 )) for i in range (self .nvar )]
2597
+ else :
2598
+ self .vlblist = [self ._null_terminate (self .path_or_buf .read (32 )) for i in range (self .nvar )]
2574
2599
2575
- # ignore expansion fields
2600
+ # ignore expansion fields (Format 105 and later)
2576
2601
# When reading, read five bytes; the last four bytes now tell you the
2577
2602
# size of the next read, which you discard. You then continue like
2578
2603
# this until you read 5 bytes of zeros.
2579
2604
2580
- while True :
2581
- self .data_type = struct .unpack (self .byteorder + 'b' , self .path_or_buf .read (1 ))[0 ]
2582
- self .data_len = struct .unpack (self .byteorder + 'i' , self .path_or_buf .read (4 ))[0 ]
2583
- if self .data_type == 0 :
2584
- break
2585
- self .path_or_buf .read (self .data_len )
2605
+ if self .format_version > 104 :
2606
+ while True :
2607
+ data_type = struct .unpack (self .byteorder + 'b' , self .path_or_buf .read (1 ))[0 ]
2608
+ if self .format_version > 108 :
2609
+ data_len = struct .unpack (self .byteorder + 'i' , self .path_or_buf .read (4 ))[0 ]
2610
+ else :
2611
+ data_len = struct .unpack (self .byteorder + 'h' , self .path_or_buf .read (2 ))[0 ]
2612
+ if data_type == 0 :
2613
+ break
2614
+ self .path_or_buf .read (data_len )
2586
2615
2587
2616
# necessary data to continue parsing
2588
2617
self .data_location = self .path_or_buf .tell ()
@@ -2678,6 +2707,9 @@ def _read_value_labels(self):
2678
2707
2679
2708
self .value_label_dict = dict ()
2680
2709
2710
+ if self .format_version <= 108 :
2711
+ return # Value labels are not supported in version 108 and earlier.
2712
+
2681
2713
while True :
2682
2714
slength = self .path_or_buf .read (4 )
2683
2715
if not slength :
0 commit comments