From 65bbb14585c87e4f7f98e47346ef16821eec729e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sat, 11 Dec 2021 17:11:05 -0500 Subject: [PATCH 1/2] TYP: stricter type for compression --- pandas/_typing.py | 4 +++- pandas/io/parsers/readers.py | 5 +++-- pandas/io/xml.py | 12 ++++++------ pandas/tests/io/xml/test_to_xml.py | 5 ++++- pandas/tests/io/xml/test_xml.py | 5 ++++- 5 files changed, 20 insertions(+), 11 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index 2ca6065e9898d..95277e97eae98 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -242,7 +242,9 @@ def closed(self) -> bool: # compression keywords and compression CompressionDict = Dict[str, Any] -CompressionOptions = Optional[Union[str, CompressionDict]] +CompressionOptions = Optional[ + Union[Literal["infer", "gzip", "bz2", "zip", "xz"], CompressionDict] +] # types in DataFrameFormatter diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 82f8ee553df8e..a35970597c87d 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -19,6 +19,7 @@ from pandas._libs.parsers import STR_NA_VALUES from pandas._typing import ( ArrayLike, + CompressionOptions, DtypeArg, FilePath, ReadCsvBuffer, @@ -618,7 +619,7 @@ def read_csv( iterator=False, chunksize=None, # Quoting, Compression, and File Format - compression="infer", + compression: CompressionOptions = "infer", thousands=None, decimal: str = ".", lineterminator=None, @@ -716,7 +717,7 @@ def read_table( iterator=False, chunksize=None, # Quoting, Compression, and File Format - compression="infer", + compression: CompressionOptions = "infer", thousands=None, decimal: str = ".", lineterminator=None, diff --git a/pandas/io/xml.py b/pandas/io/xml.py index 3c3b4afa2c57d..a54546a37f284 100644 --- a/pandas/io/xml.py +++ b/pandas/io/xml.py @@ -105,8 +105,8 @@ def __init__( names, encoding, stylesheet, - compression, - storage_options, + compression: CompressionOptions, + storage_options: StorageOptions, ) -> None: self.path_or_buffer = path_or_buffer self.xpath = xpath @@ -570,8 +570,8 @@ def _transform_doc(self) -> bytes: def get_data_from_filepath( filepath_or_buffer: FilePath | bytes | ReadBuffer[bytes] | ReadBuffer[str], encoding, - compression, - storage_options, + compression: CompressionOptions, + storage_options: StorageOptions, ) -> str | bytes | ReadBuffer[bytes] | ReadBuffer[str]: """ Extract raw XML data. @@ -666,8 +666,8 @@ def _parse( encoding, parser, stylesheet, - compression, - storage_options, + compression: CompressionOptions, + storage_options: StorageOptions, **kwargs, ) -> DataFrame: """ diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py index b8d146c597d2c..5b839d4a436f4 100644 --- a/pandas/tests/io/xml/test_to_xml.py +++ b/pandas/tests/io/xml/test_to_xml.py @@ -1311,7 +1311,10 @@ def test_filename_and_suffix_comp(parser, comp, compfile): def test_unsuported_compression(datapath, parser): with pytest.raises(ValueError, match="Unrecognized compression type"): with tm.ensure_clean() as path: - geom_df.to_xml(path, parser=parser, compression="7z") + # Argument "compression" to "to_xml" of "DataFrame" has incompatible type + # "Literal['7z']"; expected "Union[Literal['infer'], Literal['gzip'], + # Literal['bz2'], Literal['zip'], Literal['xz'], Dict[str, Any], None]" + geom_df.to_xml(path, parser=parser, compression="7z") # type: ignore[arg-type] # STORAGE OPTIONS diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py index 70a75bd34be71..2e718073c4174 100644 --- a/pandas/tests/io/xml/test_xml.py +++ b/pandas/tests/io/xml/test_xml.py @@ -1069,7 +1069,10 @@ def test_wrong_compression_zip(parser, comp): def test_unsuported_compression(datapath, parser): with pytest.raises(ValueError, match="Unrecognized compression type"): with tm.ensure_clean() as path: - read_xml(path, parser=parser, compression="7z") + # error: Argument "compression" to "read_xml" has incompatible type + # "Literal['7z']"; expected "Union[Literal['infer'], Literal['gzip'], + # Literal['bz2'], Literal['zip'], Literal['xz'], Dict[str, Any], None]" + read_xml(path, parser=parser, compression="7z") # type: ignore[arg-type] # STORAGE OPTIONS From 682ea2bf1f0c1a86d32383692f8d8801688a904c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sat, 11 Dec 2021 17:19:36 -0500 Subject: [PATCH 2/2] line length --- pandas/tests/io/xml/test_to_xml.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py index 5b839d4a436f4..eea6c535f12b6 100644 --- a/pandas/tests/io/xml/test_to_xml.py +++ b/pandas/tests/io/xml/test_to_xml.py @@ -1314,7 +1314,9 @@ def test_unsuported_compression(datapath, parser): # Argument "compression" to "to_xml" of "DataFrame" has incompatible type # "Literal['7z']"; expected "Union[Literal['infer'], Literal['gzip'], # Literal['bz2'], Literal['zip'], Literal['xz'], Dict[str, Any], None]" - geom_df.to_xml(path, parser=parser, compression="7z") # type: ignore[arg-type] + geom_df.to_xml( + path, parser=parser, compression="7z" # type: ignore[arg-type] + ) # STORAGE OPTIONS