Skip to content

Commit 751751c

Browse files
committed
Initial commit to fixe issue pandas-dev#11526
1 parent 6c73e76 commit 751751c

File tree

2 files changed

+18
-3
lines changed

2 files changed

+18
-3
lines changed

pandas/io/stata.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@
3434
from pandas.tslib import NaT, Timestamp
3535

3636
_version_error = ("Version of given Stata file is not 104, 105, 108, "
37-
"113 (Stata 8/9), 114 (Stata 10/11), 115 (Stata 12), "
38-
"117 (Stata 13), or 118 (Stata 14)")
37+
"111 (Stata 7SE), 113 (Stata 8/9), 114 (Stata 10/11), "
38+
"115 (Stata 12), 117 (Stata 13), or 118 (Stata 14)")
3939

4040
_statafile_processing_params1 = """\
4141
convert_dates : boolean, defaults to True
@@ -1183,7 +1183,7 @@ def _get_seek_variable_labels(self):
11831183

11841184
def _read_old_header(self, first_char):
11851185
self.format_version = struct.unpack('b', first_char)[0]
1186-
if self.format_version not in [104, 105, 108, 113, 114, 115]:
1186+
if self.format_version not in [104, 105, 108, 111, 113, 114, 115]:
11871187
raise ValueError(_version_error)
11881188
self.byteorder = struct.unpack('b', self.path_or_buf.read(1))[
11891189
0] == 0x1 and '>' or '<'

pandas/io/tests/test_stata.py

+15
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ def setUp(self):
8282
self.dta22_118 = os.path.join(self.dirpath, 'stata14_118.dta')
8383
self.dta23 = os.path.join(self.dirpath, 'stata15.dta')
8484

85+
self.dta24_111 = os.path.join(self.dirpath, 'stata7_111.dta')
86+
8587
def read_dta(self, file):
8688
# Legacy default reader configuration
8789
return read_stata(file, convert_dates=True)
@@ -1219,6 +1221,19 @@ def test_repeated_column_labels(self):
12191221
read_stata(self.dta23, convert_categoricals=True)
12201222
tm.assertTrue('wolof' in cm.exception)
12211223

1224+
def test_stata_111(self):
1225+
# 111 is an old version but still used by current versions of
1226+
# SAS when exporting to Stata format. We do not know of any
1227+
# on-line documentation for this version.
1228+
df = read_stata(self.dta24_111)
1229+
original = pd.DataFrame({'y': [1,1,1,1,1,0,0,np.NaN,0,0],
1230+
'x': [1,2,1,3,np.NaN,4,3,5,1,6],
1231+
'w': [2,np.NaN,5,2,4,4,3,1,2,3],
1232+
'z': ['a','b','c','d','e','','g','h','i','j']})
1233+
original = original[['y', 'x', 'w', 'z']]
1234+
tm.assert_frame_equal(original, df)
1235+
1236+
12221237
if __name__ == '__main__':
12231238
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
12241239
exit=False)

0 commit comments

Comments
 (0)