From e380844fcc28763c3777fde735634737544aec54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Fri, 17 Jun 2022 20:46:15 -0400 Subject: [PATCH 1/4] TYP: read_sas --- pandas/io/sas/sas7bdat.py | 16 ++++++++-------- pandas/io/sas/sas_xport.py | 4 ++-- pandas/io/sas/sasreader.py | 8 ++++---- pyright_reportGeneralTypeIssues.json | 3 +-- 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index a992c1af5ddaf..c8ade1d59472a 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -163,12 +163,12 @@ def __init__( self, path_or_buf: FilePath | ReadBuffer[bytes], index=None, - convert_dates=True, - blank_missing=True, - chunksize=None, - encoding=None, - convert_text=True, - convert_header_text=True, + convert_dates: bool = True, + blank_missing: bool = True, + chunksize: int | None = None, + encoding: str | None = None, + convert_text: bool = True, + convert_header_text: bool = True, compression: CompressionOptions = "infer", ) -> None: @@ -732,7 +732,7 @@ def _process_format_subheader(self, offset: int, length: int) -> None: self.column_formats.append(column_format) self.columns.append(col) - def read(self, nrows: int | None = None) -> DataFrame | None: + def read(self, nrows: int | None = None) -> DataFrame: if (nrows is None) and (self.chunksize is not None): nrows = self.chunksize @@ -744,7 +744,7 @@ def read(self, nrows: int | None = None) -> DataFrame | None: raise EmptyDataError("No columns to parse from file") if nrows > 0 and self._current_row_in_file_index >= self.row_count: - return None + return pd.DataFrame() m = self.row_count - self._current_row_in_file_index if nrows > m: diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py index db09983cacfbc..500e88eb0ef76 100644 --- a/pandas/io/sas/sas_xport.py +++ b/pandas/io/sas/sas_xport.py @@ -280,7 +280,7 @@ def __init__( self.close() raise - def close(self): + def close(self) -> None: self.handles.close() def _get_row(self): @@ -463,7 +463,7 @@ def _missing_double(self, vec): return miss @Appender(_read_method_doc) - def read(self, nrows=None): + def read(self, nrows: int | None = None) -> pd.DataFrame: if nrows is None: nrows = self.nobs diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index ff50df886e627..052e674d1a488 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -38,17 +38,17 @@ class ReaderBase(metaclass=ABCMeta): """ @abstractmethod - def read(self, nrows=None): + def read(self, nrows: int | None = None) -> DataFrame: pass @abstractmethod - def close(self): + def close(self) -> None: pass - def __enter__(self): + def __enter__(self) -> ReaderBase: return self - def __exit__(self, exc_type, exc_value, traceback): + def __exit__(self, exc_type, exc_value, traceback) -> None: self.close() diff --git a/pyright_reportGeneralTypeIssues.json b/pyright_reportGeneralTypeIssues.json index f6f9bed1af065..83d76e752a057 100644 --- a/pyright_reportGeneralTypeIssues.json +++ b/pyright_reportGeneralTypeIssues.json @@ -105,8 +105,7 @@ "pandas/io/parsers/base_parser.py", "pandas/io/parsers/c_parser_wrapper.py", "pandas/io/pytables.py", - "pandas/io/sas/sas7bdat.py", - "pandas/io/sas/sasreader.py", + "pandas/io/sas/sas_xport.py", "pandas/io/sql.py", "pandas/io/stata.py", "pandas/io/xml.py", From ec260b1297b7de77d3f6c75205f78d85e62ef4b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sat, 18 Jun 2022 08:30:43 -0400 Subject: [PATCH 2/4] make __next__ compatibel with empty datafram --- pandas/io/sas/sas7bdat.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index c8ade1d59472a..12473ffafa6b0 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -361,9 +361,9 @@ def _get_properties(self) -> None: self.encoding or self.default_encoding ) - def __next__(self): + def __next__(self) -> DataFrame: da = self.read(nrows=self.chunksize or 1) - if da is None: + if df.empty: self.close() raise StopIteration return da @@ -744,7 +744,7 @@ def read(self, nrows: int | None = None) -> DataFrame: raise EmptyDataError("No columns to parse from file") if nrows > 0 and self._current_row_in_file_index >= self.row_count: - return pd.DataFrame() + return DataFrame() m = self.row_count - self._current_row_in_file_index if nrows > m: From cc50ff1f841e1248578e13a5bd0c61b71c1d3631 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sat, 18 Jun 2022 10:05:41 -0400 Subject: [PATCH 3/4] df -> da --- pandas/io/sas/sas7bdat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index 12473ffafa6b0..5298178b4efcd 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -363,7 +363,7 @@ def _get_properties(self) -> None: def __next__(self) -> DataFrame: da = self.read(nrows=self.chunksize or 1) - if df.empty: + if da.empty: self.close() raise StopIteration return da From 2deb5b7dba4a36e8ba1ed08bc6728b88e0a4305e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sat, 18 Jun 2022 16:14:37 -0400 Subject: [PATCH 4/4] whatsnew --- doc/source/whatsnew/v1.5.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 76f6e864a174f..c850664879a5b 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -461,6 +461,7 @@ Other API changes October 2022. (:issue:`46312`) - :func:`read_json` now raises ``FileNotFoundError`` (previously ``ValueError``) when input is a string ending in ``.json``, ``.json.gz``, ``.json.bz2``, etc. but no such file exists. (:issue:`29102`) - Operations with :class:`Timestamp` or :class:`Timedelta` that would previously raise ``OverflowError`` instead raise ``OutOfBoundsDatetime`` or ``OutOfBoundsTimedelta`` where appropriate (:issue:`47268`) +- When :func:`read_sas` previously returned ``None``, it now returns an empty :class:`DataFrame` (:issue:`47410`) - .. ---------------------------------------------------------------------------