From 665621c9cd18397d0e534a64202fa297e780689d Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Mon, 11 May 2020 16:05:12 +0100 Subject: [PATCH] MAINT: Clarify supported Stata dta versions Test against old Stata versions and remove text indicating support for versions which do not work reliably closes #26667 --- doc/source/whatsnew/v1.1.0.rst | 3 ++- pandas/io/stata.py | 6 +++--- pandas/tests/io/data/stata/stata-compat-105.dta | Bin 0 -> 771 bytes pandas/tests/io/data/stata/stata-compat-108.dta | Bin 0 -> 1128 bytes pandas/tests/io/data/stata/stata-compat-111.dta | Bin 0 -> 1514 bytes pandas/tests/io/data/stata/stata-compat-113.dta | Bin 0 -> 1514 bytes pandas/tests/io/data/stata/stata-compat-114.dta | Bin 0 -> 1810 bytes pandas/tests/io/data/stata/stata-compat-118.dta | Bin 0 -> 5798 bytes pandas/tests/io/test_stata.py | 10 ++++++++++ 9 files changed, 15 insertions(+), 4 deletions(-) create mode 100644 pandas/tests/io/data/stata/stata-compat-105.dta create mode 100644 pandas/tests/io/data/stata/stata-compat-108.dta create mode 100644 pandas/tests/io/data/stata/stata-compat-111.dta create mode 100644 pandas/tests/io/data/stata/stata-compat-113.dta create mode 100644 pandas/tests/io/data/stata/stata-compat-114.dta create mode 100644 pandas/tests/io/data/stata/stata-compat-118.dta diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index e56014ed866ca..efbdab206222c 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -226,7 +226,8 @@ Other enhancements - :meth:`~pandas.core.resample.Resampler.interpolate` now supports SciPy interpolation method :class:`scipy.interpolate.CubicSpline` as method ``cubicspline`` (:issue:`33670`) - The ``ExtensionArray`` class has now an :meth:`~pandas.arrays.ExtensionArray.equals` method, similarly to :meth:`Series.equals` (:issue:`27081`). -- +- The minimum suppported dta version has increased to 105 in :meth:`~pandas.io.stata.read_stata` and :class:`~pandas.io.stata.StataReader` (:issue:`26667`). + .. --------------------------------------------------------------------------- diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 3ce1680c109f9..789e08d0652c9 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -51,7 +51,7 @@ _version_error = ( "Version of given Stata file is {version}. pandas supports importing " - "versions 104, 105, 108, 111 (Stata 7SE), 113 (Stata 8/9), " + "versions 105, 108, 111 (Stata 7SE), 113 (Stata 8/9), " "114 (Stata 10/11), 115 (Stata 12), 117 (Stata 13), 118 (Stata 14/15/16)," "and 119 (Stata 15/16, over 32,767 variables)." ) @@ -888,8 +888,8 @@ def __init__(self): 98: 251, # byte 105: 252, # int 108: 253, # long - 102: 254 # float - # don't know old code for double + 102: 254, # float + 100: 255, # double } # These missing values are the generic '.' in Stata, and are used diff --git a/pandas/tests/io/data/stata/stata-compat-105.dta b/pandas/tests/io/data/stata/stata-compat-105.dta new file mode 100644 index 0000000000000000000000000000000000000000..b7092a7947a4318ce94310103e6bcdf72a4e1874 GIT binary patch literal 771 zcmc~~Vr1Z8U}hi(Bqe3$q^0C^X6B`&R)7r1w166uX=nyvWf~hn7->)$h7>rX1j;Hl zG=Pbr)2bGF2I){ioNPlQxFS_cIFmsYSvK{2{LqkM`(fbswT-~a#D z|37=?%-&fuL13r7kR<~{Vp4KSYFc_mW)>6FjHE!OY0W!pP9T0F>Ql r30;3@? z8UlkO1Q`GS|NZ}e{r|IP&g`8v69jhJ3t2KSBqk-Nq^6~3WM(lz9eVbR&pwEXllH0;3@? z8UlkO1Q`GS|NZ}e{r|IP&g`8v69jhJ3t2KSBqk-Nq^6~3WM(lz9eVbR&pwEXllHF!tEB26#8UflRuum>Vt^wL7kkTPbNo4vQ0>x*{@XeOw2FC3t zbk*+0-MEeK-tX-IDtiItOPSH7nZUJUZ*R98ch7PN^fU^77CxEbP{HA@jLC4gKAm2#y3-*53Ie^ow`fKalMfpRr6ob!LdB|E2stn%HOCt z9@6J*ke!4B_oJu=1{@R!3T&qW_MF(v4dwMS%NX)M=f-x56?PpI2nqxRf&xK-pg>Sy z+Z3qO9sSJ^MpG*2NHJmR3w$IApnskurpQZdy;SSpTEEt(>-DFjL)$(+$P|3Den^L- zgpDUt{_H|2>gjs##07rI0uTNQwB_Db>ryK^h)mE)FKb`jjBl$m0?JkZQi;?q(A&zr L*FN2F%hL28jjHT} literal 0 HcmV?d00001 diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 6839e3ed0bbea..783e06c9b7f2e 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -1853,3 +1853,13 @@ def test_writer_118_exceptions(self): with tm.ensure_clean() as path: with pytest.raises(ValueError, match="You must use version 119"): StataWriterUTF8(path, df, version=118) + + +@pytest.mark.parametrize("version", [105, 108, 111, 113, 114]) +def test_backward_compat(version, datapath): + data_base = datapath("io", "data", "stata") + ref = os.path.join(data_base, f"stata-compat-118.dta") + old = os.path.join(data_base, f"stata-compat-{version}.dta") + expected = pd.read_stata(ref) + old_dta = pd.read_stata(old) + tm.assert_frame_equal(old_dta, expected, check_dtype=False)