From 65bbb14585c87e4f7f98e47346ef16821eec729e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= <twoertwein@gmail.com>
Date: Sat, 11 Dec 2021 17:11:05 -0500
Subject: [PATCH 1/2] TYP: stricter type for compression

---
 pandas/_typing.py                  |  4 +++-
 pandas/io/parsers/readers.py       |  5 +++--
 pandas/io/xml.py                   | 12 ++++++------
 pandas/tests/io/xml/test_to_xml.py |  5 ++++-
 pandas/tests/io/xml/test_xml.py    |  5 ++++-
 5 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/pandas/_typing.py b/pandas/_typing.py
index 2ca6065e9898d..95277e97eae98 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -242,7 +242,9 @@ def closed(self) -> bool:
 
 # compression keywords and compression
 CompressionDict = Dict[str, Any]
-CompressionOptions = Optional[Union[str, CompressionDict]]
+CompressionOptions = Optional[
+    Union[Literal["infer", "gzip", "bz2", "zip", "xz"], CompressionDict]
+]
 
 
 # types in DataFrameFormatter
diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
index 82f8ee553df8e..a35970597c87d 100644
--- a/pandas/io/parsers/readers.py
+++ b/pandas/io/parsers/readers.py
@@ -19,6 +19,7 @@
 from pandas._libs.parsers import STR_NA_VALUES
 from pandas._typing import (
     ArrayLike,
+    CompressionOptions,
     DtypeArg,
     FilePath,
     ReadCsvBuffer,
@@ -618,7 +619,7 @@ def read_csv(
     iterator=False,
     chunksize=None,
     # Quoting, Compression, and File Format
-    compression="infer",
+    compression: CompressionOptions = "infer",
     thousands=None,
     decimal: str = ".",
     lineterminator=None,
@@ -716,7 +717,7 @@ def read_table(
     iterator=False,
     chunksize=None,
     # Quoting, Compression, and File Format
-    compression="infer",
+    compression: CompressionOptions = "infer",
     thousands=None,
     decimal: str = ".",
     lineterminator=None,
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index 3c3b4afa2c57d..a54546a37f284 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -105,8 +105,8 @@ def __init__(
         names,
         encoding,
         stylesheet,
-        compression,
-        storage_options,
+        compression: CompressionOptions,
+        storage_options: StorageOptions,
     ) -> None:
         self.path_or_buffer = path_or_buffer
         self.xpath = xpath
@@ -570,8 +570,8 @@ def _transform_doc(self) -> bytes:
 def get_data_from_filepath(
     filepath_or_buffer: FilePath | bytes | ReadBuffer[bytes] | ReadBuffer[str],
     encoding,
-    compression,
-    storage_options,
+    compression: CompressionOptions,
+    storage_options: StorageOptions,
 ) -> str | bytes | ReadBuffer[bytes] | ReadBuffer[str]:
     """
     Extract raw XML data.
@@ -666,8 +666,8 @@ def _parse(
     encoding,
     parser,
     stylesheet,
-    compression,
-    storage_options,
+    compression: CompressionOptions,
+    storage_options: StorageOptions,
     **kwargs,
 ) -> DataFrame:
     """
diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py
index b8d146c597d2c..5b839d4a436f4 100644
--- a/pandas/tests/io/xml/test_to_xml.py
+++ b/pandas/tests/io/xml/test_to_xml.py
@@ -1311,7 +1311,10 @@ def test_filename_and_suffix_comp(parser, comp, compfile):
 def test_unsuported_compression(datapath, parser):
     with pytest.raises(ValueError, match="Unrecognized compression type"):
         with tm.ensure_clean() as path:
-            geom_df.to_xml(path, parser=parser, compression="7z")
+            # Argument "compression" to "to_xml" of "DataFrame" has incompatible type
+            # "Literal['7z']"; expected "Union[Literal['infer'], Literal['gzip'],
+            # Literal['bz2'], Literal['zip'], Literal['xz'], Dict[str, Any], None]"
+            geom_df.to_xml(path, parser=parser, compression="7z")  # type: ignore[arg-type]
 
 
 # STORAGE OPTIONS
diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
index 70a75bd34be71..2e718073c4174 100644
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -1069,7 +1069,10 @@ def test_wrong_compression_zip(parser, comp):
 def test_unsuported_compression(datapath, parser):
     with pytest.raises(ValueError, match="Unrecognized compression type"):
         with tm.ensure_clean() as path:
-            read_xml(path, parser=parser, compression="7z")
+            # error: Argument "compression" to "read_xml" has incompatible type
+            # "Literal['7z']"; expected "Union[Literal['infer'], Literal['gzip'],
+            # Literal['bz2'], Literal['zip'], Literal['xz'], Dict[str, Any], None]"
+            read_xml(path, parser=parser, compression="7z")  # type: ignore[arg-type]
 
 
 # STORAGE OPTIONS

From 682ea2bf1f0c1a86d32383692f8d8801688a904c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= <twoertwein@gmail.com>
Date: Sat, 11 Dec 2021 17:19:36 -0500
Subject: [PATCH 2/2] line length

---
 pandas/tests/io/xml/test_to_xml.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py
index 5b839d4a436f4..eea6c535f12b6 100644
--- a/pandas/tests/io/xml/test_to_xml.py
+++ b/pandas/tests/io/xml/test_to_xml.py
@@ -1314,7 +1314,9 @@ def test_unsuported_compression(datapath, parser):
             # Argument "compression" to "to_xml" of "DataFrame" has incompatible type
             # "Literal['7z']"; expected "Union[Literal['infer'], Literal['gzip'],
             # Literal['bz2'], Literal['zip'], Literal['xz'], Dict[str, Any], None]"
-            geom_df.to_xml(path, parser=parser, compression="7z")  # type: ignore[arg-type]
+            geom_df.to_xml(
+                path, parser=parser, compression="7z"  # type: ignore[arg-type]
+            )
 
 
 # STORAGE OPTIONS