diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index e56014ed866ca..efbdab206222c 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -226,7 +226,8 @@ Other enhancements - :meth:`~pandas.core.resample.Resampler.interpolate` now supports SciPy interpolation method :class:`scipy.interpolate.CubicSpline` as method ``cubicspline`` (:issue:`33670`) - The ``ExtensionArray`` class has now an :meth:`~pandas.arrays.ExtensionArray.equals` method, similarly to :meth:`Series.equals` (:issue:`27081`). -- +- The minimum suppported dta version has increased to 105 in :meth:`~pandas.io.stata.read_stata` and :class:`~pandas.io.stata.StataReader` (:issue:`26667`). + .. --------------------------------------------------------------------------- diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 3ce1680c109f9..789e08d0652c9 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -51,7 +51,7 @@ _version_error = ( "Version of given Stata file is {version}. pandas supports importing " - "versions 104, 105, 108, 111 (Stata 7SE), 113 (Stata 8/9), " + "versions 105, 108, 111 (Stata 7SE), 113 (Stata 8/9), " "114 (Stata 10/11), 115 (Stata 12), 117 (Stata 13), 118 (Stata 14/15/16)," "and 119 (Stata 15/16, over 32,767 variables)." ) @@ -888,8 +888,8 @@ def __init__(self): 98: 251, # byte 105: 252, # int 108: 253, # long - 102: 254 # float - # don't know old code for double + 102: 254, # float + 100: 255, # double } # These missing values are the generic '.' in Stata, and are used diff --git a/pandas/tests/io/data/stata/stata-compat-105.dta b/pandas/tests/io/data/stata/stata-compat-105.dta new file mode 100644 index 0000000000000..b7092a7947a43 Binary files /dev/null and b/pandas/tests/io/data/stata/stata-compat-105.dta differ diff --git a/pandas/tests/io/data/stata/stata-compat-108.dta b/pandas/tests/io/data/stata/stata-compat-108.dta new file mode 100644 index 0000000000000..e2e6b150e5329 Binary files /dev/null and b/pandas/tests/io/data/stata/stata-compat-108.dta differ diff --git a/pandas/tests/io/data/stata/stata-compat-111.dta b/pandas/tests/io/data/stata/stata-compat-111.dta new file mode 100644 index 0000000000000..30e3561d126d4 Binary files /dev/null and b/pandas/tests/io/data/stata/stata-compat-111.dta differ diff --git a/pandas/tests/io/data/stata/stata-compat-113.dta b/pandas/tests/io/data/stata/stata-compat-113.dta new file mode 100644 index 0000000000000..7d5ba50627e1a Binary files /dev/null and b/pandas/tests/io/data/stata/stata-compat-113.dta differ diff --git a/pandas/tests/io/data/stata/stata-compat-114.dta b/pandas/tests/io/data/stata/stata-compat-114.dta new file mode 100644 index 0000000000000..26b801584a2fa Binary files /dev/null and b/pandas/tests/io/data/stata/stata-compat-114.dta differ diff --git a/pandas/tests/io/data/stata/stata-compat-118.dta b/pandas/tests/io/data/stata/stata-compat-118.dta new file mode 100644 index 0000000000000..cb4fd9b80cf47 Binary files /dev/null and b/pandas/tests/io/data/stata/stata-compat-118.dta differ diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 6839e3ed0bbea..783e06c9b7f2e 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -1853,3 +1853,13 @@ def test_writer_118_exceptions(self): with tm.ensure_clean() as path: with pytest.raises(ValueError, match="You must use version 119"): StataWriterUTF8(path, df, version=118) + + +@pytest.mark.parametrize("version", [105, 108, 111, 113, 114]) +def test_backward_compat(version, datapath): + data_base = datapath("io", "data", "stata") + ref = os.path.join(data_base, f"stata-compat-118.dta") + old = os.path.join(data_base, f"stata-compat-{version}.dta") + expected = pd.read_stata(ref) + old_dta = pd.read_stata(old) + tm.assert_frame_equal(old_dta, expected, check_dtype=False)