Skip to content

Commit 6e62b3a

Browse files
bashtageKevin Sheppard
authored and
Kevin Sheppard
committed
MAINT: Clarify supported Stata dta versions
Test against old Stata versions and remove text indicating support for versions which do not work reliably closes pandas-dev#26667
1 parent 9929fca commit 6e62b3a

File tree

6 files changed

+16
-6
lines changed

6 files changed

+16
-6
lines changed

pandas/io/stata.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,9 @@
5151

5252
_version_error = (
5353
"Version of given Stata file is {version}. pandas supports importing "
54-
"versions 104, 105, 108, 111 (Stata 7SE), 113 (Stata 8/9), "
55-
"114 (Stata 10/11), 115 (Stata 12), 117 (Stata 13), 118 (Stata 14/15/16),"
56-
"and 119 (Stata 15/16, over 32,767 variables)."
54+
"versions 111 (Stata 7SE), 113 (Stata 8/9), 114 (Stata 10/11), "
55+
"115 (Stata 12), 117 (Stata 13), 118 (Stata 14/15/16), and 119 "
56+
"(Stata 15/16, over 32,767 variables)."
5757
)
5858

5959
_statafile_processing_params1 = """\
@@ -1245,7 +1245,7 @@ def _get_time_stamp(self) -> str:
12451245
elif self.format_version == 117:
12461246
strlen = struct.unpack("b", self.path_or_buf.read(1))[0]
12471247
return self._decode(self.path_or_buf.read(strlen))
1248-
elif self.format_version > 104:
1248+
elif self.format_version >= 111:
12491249
return self._decode(self.path_or_buf.read(18))
12501250
else:
12511251
raise ValueError()
@@ -1264,7 +1264,7 @@ def _get_seek_variable_labels(self) -> int:
12641264

12651265
def _read_old_header(self, first_char: bytes) -> None:
12661266
self.format_version = struct.unpack("b", first_char)[0]
1267-
if self.format_version not in [104, 105, 108, 111, 113, 114, 115]:
1267+
if self.format_version not in [111, 113, 114, 115]:
12681268
raise ValueError(_version_error.format(version=self.format_version))
12691269
self._set_encoding()
12701270
self.byteorder = (
@@ -1328,7 +1328,7 @@ def _read_old_header(self, first_char: bytes) -> None:
13281328
# the size of the next read, which you discard. You then continue
13291329
# like this until you read 5 bytes of zeros.
13301330

1331-
if self.format_version > 104:
1331+
if self.format_version >= 111:
13321332
while True:
13331333
data_type = struct.unpack(
13341334
self.byteorder + "b", self.path_or_buf.read(1)
1.48 KB
Binary file not shown.
1.48 KB
Binary file not shown.
1.77 KB
Binary file not shown.
5.66 KB
Binary file not shown.

pandas/tests/io/test_stata.py

+10
Original file line numberDiff line numberDiff line change
@@ -1853,3 +1853,13 @@ def test_writer_118_exceptions(self):
18531853
with tm.ensure_clean() as path:
18541854
with pytest.raises(ValueError, match="You must use version 119"):
18551855
StataWriterUTF8(path, df, version=118)
1856+
1857+
1858+
@pytest.mark.parametrize("version", [111, 113, 114])
1859+
def test_backward_compat(version, datapath):
1860+
data_base = datapath("io", "data", "stata")
1861+
ref = os.path.join(data_base, f"stata-compat-118.dta")
1862+
old = os.path.join(data_base, f"stata-compat-{version}.dta")
1863+
expected = pd.read_stata(ref)
1864+
old_dta = pd.read_stata(old)
1865+
tm.assert_frame_equal(old_dta, expected, check_dtype=False)

0 commit comments

Comments
 (0)