From e45e23be979e69c47fdfdce1d6d9d62f59251497 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sun, 21 Apr 2024 08:55:40 -0400 Subject: [PATCH 1/2] TYP: export SASReader in pandas.api.typing --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/api/typing/__init__.py | 2 ++ pandas/io/sas/sas7bdat.py | 5 ++--- pandas/io/sas/sas_xport.py | 5 ++--- pandas/io/sas/sasreader.py | 13 +++++++------ 5 files changed, 14 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 8618d7d525771..c817e09b3b360 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -29,6 +29,7 @@ enhancement2 Other enhancements ^^^^^^^^^^^^^^^^^^ - :class:`pandas.api.typing.FrozenList` is available for typing the outputs of :attr:`MultiIndex.names`, :attr:`MultiIndex.codes` and :attr:`MultiIndex.levels` (:issue:`58237`) +- :class:`pandas.api.typing.SASReader` is available for typing the output of :func:`read_sas` (:issue:`55689`) - :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`) - :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`) - :meth:`Styler.set_tooltips` provides alternative method to storing tooltips by using title attribute of td elements. (:issue:`56981`) diff --git a/pandas/api/typing/__init__.py b/pandas/api/typing/__init__.py index df6392bf692a2..c58fa0f085266 100644 --- a/pandas/api/typing/__init__.py +++ b/pandas/api/typing/__init__.py @@ -30,6 +30,7 @@ # TODO: Can't import Styler without importing jinja2 # from pandas.io.formats.style import Styler from pandas.io.json._json import JsonReader +from pandas.io.sas.sasreader import SASReader from pandas.io.stata import StataReader __all__ = [ @@ -49,6 +50,7 @@ "RollingGroupby", "SeriesGroupBy", "StataReader", + "SASReader", # See TODO above # "Styler", "TimedeltaIndexResamplerGroupby", diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index 6a392a0f02caf..25257d5fcc192 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -16,7 +16,6 @@ from __future__ import annotations -from collections import abc from datetime import datetime import sys from typing import TYPE_CHECKING @@ -45,7 +44,7 @@ from pandas.io.common import get_handle import pandas.io.sas.sas_constants as const -from pandas.io.sas.sasreader import ReaderBase +from pandas.io.sas.sasreader import SASReader if TYPE_CHECKING: from pandas._typing import ( @@ -116,7 +115,7 @@ def __init__( # SAS7BDAT represents a SAS data file in SAS7BDAT format. -class SAS7BDATReader(ReaderBase, abc.Iterator): +class SAS7BDATReader(SASReader): """ Read SAS files in SAS7BDAT format. diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py index adba9bf117a8e..89dbdab64c23c 100644 --- a/pandas/io/sas/sas_xport.py +++ b/pandas/io/sas/sas_xport.py @@ -10,7 +10,6 @@ from __future__ import annotations -from collections import abc from datetime import datetime import struct from typing import TYPE_CHECKING @@ -24,7 +23,7 @@ import pandas as pd from pandas.io.common import get_handle -from pandas.io.sas.sasreader import ReaderBase +from pandas.io.sas.sasreader import SASReader if TYPE_CHECKING: from pandas._typing import ( @@ -252,7 +251,7 @@ def _parse_float_vec(vec): return ieee -class XportReader(ReaderBase, abc.Iterator): +class XportReader(SASReader): __doc__ = _xport_reader_doc def __init__( diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index 69d911863338f..12d698a4f76a8 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -8,6 +8,7 @@ ABC, abstractmethod, ) +from collections.abc import Iterator from typing import ( TYPE_CHECKING, overload, @@ -33,9 +34,9 @@ from pandas import DataFrame -class ReaderBase(ABC): +class SASReader(Iterator["DataFrame"], ABC): """ - Protocol for XportReader and SAS7BDATReader classes. + Abstract class for XportReader and SAS7BDATReader. """ @abstractmethod @@ -66,7 +67,7 @@ def read_sas( chunksize: int = ..., iterator: bool = ..., compression: CompressionOptions = ..., -) -> ReaderBase: ... +) -> SASReader: ... @overload @@ -79,7 +80,7 @@ def read_sas( chunksize: None = ..., iterator: bool = ..., compression: CompressionOptions = ..., -) -> DataFrame | ReaderBase: ... +) -> DataFrame | SASReader: ... @doc(decompression_options=_shared_docs["decompression_options"] % "filepath_or_buffer") @@ -92,7 +93,7 @@ def read_sas( chunksize: int | None = None, iterator: bool = False, compression: CompressionOptions = "infer", -) -> DataFrame | ReaderBase: +) -> DataFrame | SASReader: """ Read SAS files stored as either XPORT or SAS7BDAT format files. @@ -145,7 +146,7 @@ def read_sas( f"unable to infer format of SAS file from filename: {fname!r}" ) - reader: ReaderBase + reader: SASReader if format.lower() == "xport": from pandas.io.sas.sas_xport import XportReader From 7796e960d52ec30b504b183b43d6a57162b787f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sun, 21 Apr 2024 09:43:48 -0400 Subject: [PATCH 2/2] fix test --- pandas/tests/api/test_api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 0f2a641d13b11..b23876d9280f7 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -267,6 +267,7 @@ class TestApi(Base): "RollingGroupby", "SeriesGroupBy", "StataReader", + "SASReader", "TimedeltaIndexResamplerGroupby", "TimeGrouper", "Window",