From bf31013a07f5edd84ec1dba7d96d3bd1e6430bd2 Mon Sep 17 00:00:00 2001 From: Jonas Haag Date: Wed, 25 May 2022 14:14:32 +0200 Subject: [PATCH 1/3] Fix reading SAS7BDAT files with zero rows --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/io/sas/sas7bdat.py | 2 +- pandas/tests/io/sas/test_sas7bdat.py | 9 +++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 4a0b9a97a9d11..af68ffb3b3c66 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -814,6 +814,7 @@ I/O - Bug in :func:`read_excel` when reading a ``.ods`` file with newlines between xml elements (:issue:`45598`) - Bug in :func:`read_parquet` when ``engine="fastparquet"`` where the file was not closed on error (:issue:`46555`) - :meth:`to_html` now excludes the ``border`` attribute from ```` elements when ``border`` keyword is set to ``False``. +- Bug in :func:`read_sas` returned ``None`` rather than an empty DataFrame for SAS7BDAT files with zero rows (:issue:`18198`) - Period diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index debd686475432..1e071690d35fb 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -737,7 +737,7 @@ def read(self, nrows: int | None = None) -> DataFrame | None: self.close() raise EmptyDataError("No columns to parse from file") - if self._current_row_in_file_index >= self.row_count: + if nrows > 0 and self._current_row_in_file_index >= self.row_count: return None m = self.row_count - self._current_row_in_file_index diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 0dd1fa175fa3f..92d13c8878159 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -216,6 +216,15 @@ def test_zero_variables(datapath): pd.read_sas(fname) +def test_zero_rows(datapath): + # GH 18198 + fname = datapath("io", "sas", "data", "zero_rows.sas7bdat") + df = pd.read_sas(fname) + tm.assert_frame_equal( + df, pd.DataFrame([{"char_field": "a", "num_field": 1.0}]).iloc[:0] + ) + + def test_corrupt_read(datapath): # We don't really care about the exact failure, the important thing is # that the resource should be cleaned up afterwards (BUG #35566) From cfa1a3ecf867db4ebac624044be3b7aa2d0312f8 Mon Sep 17 00:00:00 2001 From: Jonas Haag Date: Fri, 27 May 2022 17:32:03 +0200 Subject: [PATCH 2/3] Add missing file --- pandas/tests/io/sas/data/zero_rows.sas7bdat | Bin 0 -> 262144 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 pandas/tests/io/sas/data/zero_rows.sas7bdat diff --git a/pandas/tests/io/sas/data/zero_rows.sas7bdat b/pandas/tests/io/sas/data/zero_rows.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..a5ba95b6395076990bfa83a9fde0f609b23369b5 GIT binary patch literal 262144 zcmeIzO>10L7y#fild98@MT1=g5od@lvd|%uN#h3vJJ=|N)`UzzaWje^w4hxC3dN1M z62z6@5AY9&;MRh=PKGJ_m;Lp7*@>oO7SKb0>td zeewOH^T9+I`1tsPt120L$_P2cWUFZnc7soTYIn@mO^OFj7$2xe(ULT z=ht6Y-`W^#Z9LZ>Ji9S?YU9G@)_T9+D%*I;1A?=??Os-$O@I0AN7tTt=eu6&^GjNv zo+F3{ZF+54*_?*CjX()HOeJBqH~EXGSS)778F z{K9lPT--0}pRZ3(EX4d~O2=(}xKRWTSMndn{OL;m&6r=S_!^QCRTiF@ziwCKPdZ>*)KQ=#H31K*_PaW`PylN?C8?m3s`bJrs=Nq*+ zTXgZ#;leokP?*i*<#2qQD}p~_@#%?ttJ2Q)wtM~U_05+rzO*xV?b55SzY@abD{rK5 z;>uAN51!UL7H997uDWBRxwvrhN)mU+9jfDWzbT*2R~;wAl)tO8UK-zug{dyxXLTGO z!r@Ujz8}{+s_w>Z)*ZFZe`WrcQOnL+KKaGTe;u_f`l%R(x!9*}et9tuPo%^FeRbXR zE4_DIKV9);vzSQFN$SVtfQ91W^2`7L literal 0 HcmV?d00001 From 1ff403787a77bb430708d9a81423d6b8956f20a5 Mon Sep 17 00:00:00 2001 From: Jonas Haag Date: Tue, 31 May 2022 13:54:56 +0200 Subject: [PATCH 3/3] Update test_sas7bdat.py --- pandas/tests/io/sas/test_sas7bdat.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 92d13c8878159..2b3ccf57d3959 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -219,10 +219,9 @@ def test_zero_variables(datapath): def test_zero_rows(datapath): # GH 18198 fname = datapath("io", "sas", "data", "zero_rows.sas7bdat") - df = pd.read_sas(fname) - tm.assert_frame_equal( - df, pd.DataFrame([{"char_field": "a", "num_field": 1.0}]).iloc[:0] - ) + result = pd.read_sas(fname) + expected = pd.DataFrame([{"char_field": "a", "num_field": 1.0}]).iloc[:0] + tm.assert_frame_equal(result, expected) def test_corrupt_read(datapath):