pandas-dev
diff --git a/‎doc/source/conf.py
+1-1 b/‎doc/source/conf.py
+1-1
diff --git a/‎environment.yml
+1-1 b/‎environment.yml
+1-1
diff --git a/‎pandas/_testing/_io.py
+2-4 b/‎pandas/_testing/_io.py
+2-4
diff --git a/‎pandas/conftest.py
+17-9 b/‎pandas/conftest.py
+17-9
diff --git a/‎pandas/core/describe.py
+1-1 b/‎pandas/core/describe.py
+1-1
diff --git a/‎pandas/core/frame.py
+6-19 b/‎pandas/core/frame.py
+6-19
diff --git a/‎pandas/core/shared_docs.py
+29 b/‎pandas/core/shared_docs.py
+29
diff --git a/‎pandas/io/common.py
+27-31 b/‎pandas/io/common.py
+27-31
diff --git a/‎pandas/io/formats/xml.py
+6-3 b/‎pandas/io/formats/xml.py
+6-3
diff --git a/‎pandas/io/json/_json.py
+6-7 b/‎pandas/io/json/_json.py
+6-7
@@ -215,7 +215,7 @@
 
 # The theme to use for HTML and HTML Help pages.  Major themes that come with
 # Sphinx are currently 'default' and 'sphinxdoc'.
-html_theme = "pydata_sphinx_theme"
+#html_theme = "pydata_sphinx_theme"
 
 # The style sheet to use for HTML and HTML Help pages. A file of that name
 # must exist either in Sphinx' static/ path, or in one of the custom paths
 
@@ -38,7 +38,7 @@ dependencies:
   # documentation (jupyter notebooks)
   - nbconvert>=5.4.1
   - nbsphinx
-  - pandoc
+  #- pandoc
 
   # Dask and its dependencies (that dont install with dask)
   - dask-core
 
@@ -15,6 +15,7 @@
     ReadPickleBuffer,
 )
 from pandas.compat import get_lzma_file
+from pandas.compat._optional import import_optional_dependency
 
 import pandas as pd
 from pandas._testing._random import rands
@@ -392,10 +393,7 @@ def write_to_compressed(compression, path, data, dest="test"):
     elif compression == "bz2":
         compress_method = bz2.BZ2File
     elif compression == "zstd":
-        import pytest
-
-        zstd = pytest.importorskip("zstandard")
-        compress_method = zstd.open
+        compress_method = import_optional_dependency("zstandard").open
     elif compression == "xz":
         compress_method = get_lzma_file()
     else:
 
@@ -44,13 +44,6 @@
     utc,
 )
 
-try:
-    import zstandard as zstd  # noqa: F401
-
-    have_zstd = True
-except ImportError:
-    have_zstd = False
-
 import pandas.util._test_decorators as td
 
 from pandas.core.dtypes.dtypes import (
@@ -275,7 +268,14 @@ def other_closed(request):
 
 
 @pytest.fixture(
-    params=[None, "gzip", "bz2", "zip", "xz"] + (["zstd"] if have_zstd else [])
+    params=[
+        None,
+        "gzip",
+        "bz2",
+        "zip",
+        "xz",
+        pytest.param("zstd", marks=td.skip_if_no("zstandard")),
+    ]
 )
 def compression(request):
     """
@@ -284,7 +284,15 @@ def compression(request):
     return request.param
 
 
-@pytest.fixture(params=["gzip", "bz2", "zip", "xz"] + (["zstd"] if have_zstd else []))
+@pytest.fixture(
+    params=[
+        "gzip",
+        "bz2",
+        "zip",
+        "xz",
+        pytest.param("zstd", marks=td.skip_if_no("zstandard")),
+    ]
+)
 def compression_only(request):
     """
     Fixture for trying common compression types in compression tests excluding
 
@@ -35,7 +35,6 @@
 
 from pandas.core.reshape.concat import concat
 
-from pandas.io.formats.format import format_percentiles
 
 if TYPE_CHECKING:
     from pandas import (
@@ -229,6 +228,7 @@ def describe_numeric_1d(series: Series, percentiles: Sequence[float]) -> Series:
         The percentiles to include in the output.
     """
     from pandas import Series
+    from pandas.io.formats.format import format_percentiles
 
     # error: Argument 1 to "format_percentiles" has incompatible type "Sequence[float]";
     # expected "Union[ndarray, List[Union[int, float]], List[float], List[Union[str,
 
@@ -2486,7 +2486,8 @@ def _from_arrays(
         )
         return cls(mgr)
 
-    @doc(storage_options=generic._shared_docs["storage_options"])
+    @doc(storage_options=generic._shared_docs["storage_options"],
+         compression_options=generic._shared_docs["compression_options"] % "path")
     @deprecate_kwarg(old_arg_name="fname", new_arg_name="path")
     def to_stata(
         self,
@@ -2565,16 +2566,7 @@ def to_stata(
             format. Only available if version is 117.  Storing strings in the
             StrL format can produce smaller dta files if strings have more than
             8 characters and values are repeated.
-        compression : str or dict, default 'infer'
-            For on-the-fly compression of the output dta. If string, specifies
-            compression mode. If dict, value at key 'method' specifies
-            compression mode. Compression mode must be one of {{'infer', 'gzip',
-            'bz2', 'zip', 'xz', 'zstd', None}}. If compression mode is 'infer' and
-            `fname` is path-like, then detect compression from the following
-            extensions: '.gz', '.bz2', '.zip', '.xz', '.zst' (otherwise no
-            compression). If dict and compression mode is one of {{'zip',
-            'gzip', 'bz2', 'zstd'}}, or inferred as one of the above, other entries
-            passed as additional compression options.
+        {compression_options}
 
             .. versionadded:: 1.1.0
 
@@ -2943,7 +2935,8 @@ def to_html(
             render_links=render_links,
         )
 
-    @doc(storage_options=generic._shared_docs["storage_options"])
+    @doc(storage_options=generic._shared_docs["storage_options"],
+         compression_options=generic._shared_docs["compression_options"] % "path_or_buffer")
     def to_xml(
         self,
         path_or_buffer: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,
@@ -3020,13 +3013,7 @@ def to_xml(
             layout of elements and attributes from original output. This
             argument requires ``lxml`` to be installed. Only XSLT 1.0
             scripts and not later versions is currently supported.
-        compression : {{'infer', 'gzip', 'bz2', 'zip', 'xz', 'zstd', None}},
-            default 'infer'.
-            For on-the-fly decompression of on-disk data. If 'infer', then use
-            gzip, bz2, zip, xz, or zstandard if path_or_buffer is a string ending in
-            '.gz', '.bz2', '.zip', '.xz', '.zst' respectively, and no decompression
-            otherwise. If using 'zip', the ZIP file must contain only one data
-            file to be read in. Set to None for no decompression.
+        {compression_options}
         {storage_options}
 
         Returns
 
@@ -402,6 +402,35 @@
     starting with "s3://", and "gcs://") the key-value pairs are forwarded to
     ``fsspec``. Please see ``fsspec`` and ``urllib`` for more details."""
 
+_shared_docs[
+    "compression_options"
+] = """compression : str or dict, default 'infer'
+    For on-the-fly compression of the output data. If 'infer' and '%s'
+    path-like, then detect compression from the following extensions: '.gz',
+    '.bz2', '.zip', '.xz', or '.zst' (otherwise no compression). Set to
+    ``None`` for no compression. Can also be a dict with key ``'method'`` set
+    to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``} and other
+    key-value pairs are forwarded to ``zipfile.ZipFile``, ``gzip.GzipFile``,
+    ``bz2.BZ2File``, or ``zstandard.ZstdDecompressor``, respectively. As an
+    example, the following could be passed for faster compression and to create
+    a reproducible gzip archive: ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``.
+"""
+
+_shared_docs[
+    "decompression_options"
+] = """compression : str or dict, default 'infer'
+    For on-the-fly decompression of on-disk data. If 'infer' and '%s' is
+    path-like, then detect compression from the following extensions: '.gz',
+    '.bz2', '.zip', '.xz', or '.zst' (otherwise no compression). If using
+    'zip', the ZIP file must contain only one data file to be read in. Set to
+    ``None`` for no decompression. Can also be a dict with key ``'method'`` set
+    to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``} and other
+    key-value pairs are forwarded to ``zipfile.ZipFile``, ``gzip.GzipFile``,
+    ``bz2.BZ2File``, or ``zstandard.ZstdDecompressor``, respectively. As an
+    example, the following could be passed for Zstandard decompression using a
+    custom compression dictionary: ``compression={'method': 'zstd', 'dict_data': my_compression_dict}``.
+"""
+
 _shared_docs[
     "replace"
 ] = """
 
@@ -40,6 +40,7 @@
 import warnings
 import zipfile
 
+from pandas.util._decorators import doc
 from pandas._typing import (
     BaseBuffer,
     CompressionDict,
@@ -54,22 +55,13 @@
 from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.common import is_file_like
+from pandas.core import generic
 
 _VALID_URLS = set(uses_relative + uses_netloc + uses_params)
 _VALID_URLS.discard("")
 
 BaseBufferT = TypeVar("BaseBufferT", bound=BaseBuffer)
 
-# For the _is_binary_mode, we need to get python-zstandard's reader class because
-# it doesn't use any of the builtin base classes (such as RawIOBase).
-# Unfortunately python-zstandard doesn't expose that particular class, so we have
-# to get it through `zstd.open`.
-try:
-    with import_optional_dependency("zstandard").open(io.BytesIO()) as reader:
-        _ZstdDecompressorReader: type | None = type(reader)
-except ImportError:
-    _ZstdDecompressorReader = None
-
 
 @dataclasses.dataclass
 class IOArgs:
@@ -257,6 +249,7 @@ def is_fsspec_url(url: FilePath | BaseBuffer) -> bool:
     )
 
 
+@doc(compression_options=generic._shared_docs["compression_options"] % "filepath_or_buffer")
 def _get_filepath_or_buffer(
     filepath_or_buffer: FilePath | BaseBuffer,
     encoding: str = "utf-8",
@@ -272,7 +265,7 @@ def _get_filepath_or_buffer(
     ----------
     filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path),
                          or buffer
-    compression : {{'gzip', 'bz2', 'zip', 'xz', 'zstd', None}}, optional
+    {compression_options}
     encoding : the encoding to use to decode bytes, default is 'utf-8'
     mode : str, optional
 
@@ -499,6 +492,7 @@ def get_compression_method(
     return compression_method, compression_args
 
 
+@doc(compression_options=generic._shared_docs["compression_options"] % "filepath_or_buffer")
 def infer_compression(
     filepath_or_buffer: FilePath | BaseBuffer, compression: str | None
 ) -> str | None:
@@ -512,10 +506,7 @@ def infer_compression(
     ----------
     filepath_or_buffer : str or file handle
         File path or object.
-    compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', 'zstd', None}
-        If 'infer' and `filepath_or_buffer` is path-like, then detect
-        compression from the following extensions: '.gz', '.bz2', '.zip',
-        '.xz', or '.zst' (otherwise no compression).
+    {compression_options}
 
     Returns
     -------
@@ -603,6 +594,7 @@ def get_handle(
     ...
 
 
+@doc(compression_options=generic._shared_docs["compression_options"] % "path_or_buf")
 def get_handle(
     path_or_buf: FilePath | BaseBuffer,
     mode: str,
@@ -625,15 +617,7 @@ def get_handle(
         Mode to open path_or_buf with.
     encoding : str or None
         Encoding to use.
-    compression : str or dict, default None
-        If string, specifies compression mode. If dict, value at key 'method'
-        specifies compression mode. Compression mode must be one of {'infer',
-        'gzip', 'bz2', 'zip', 'xz', 'zstd', None}. If compression mode is
-        'infer' and `filepath_or_buffer` is path-like, then detect compression
-        from the following extensions: '.gz', '.bz2', '.zip', '.xz', or '.zst'
-        (otherwise no compression). If dict and compression mode is one of
-        {'zip', 'gzip', 'bz2', 'zstd'}, or inferred as one of the above,
-        other entries passed as additional compression options.
+    {compression_options}
 
         .. versionchanged:: 1.0.0
 
@@ -1117,11 +1101,23 @@ def _is_binary_mode(handle: FilePath | BaseBuffer, mode: str) -> bool:
     if issubclass(type(handle), text_classes):
         return False
 
-    # classes that expect bytes
-    binary_classes: list[type] = [BufferedIOBase, RawIOBase]
-    # Zstandard doesn't use any of the builtin base classes
-    if _ZstdDecompressorReader is not None:
-        binary_classes.append(_ZstdDecompressorReader)
-    is_binary_class = isinstance(handle, tuple(binary_classes))
+    return isinstance(handle, _get_binary_io_classes()) or "b" in getattr(
+        handle, "mode", mode
+    )
+
+
+def _get_binary_io_classes() -> tuple[type]:
+    """IO classes that that expect bytes"""
+    binary_classes: tuple[type] = (BufferedIOBase, RawIOBase)
+
+    # python-zstandard doesn't use any of the builtin base classes; instead we
+    # have to use the `zstd.ZstdDecompressionReader` class for isinstance checks.
+    # Unfortunately `zstd.ZstdDecompressionReader` isn't exposed by python-zstandard
+    # so we have to get it from a `zstd.ZstdDecompressor` instance.
+    # See also https://github.com/indygreg/python-zstandard/pull/165.
+    zstd = import_optional_dependency("zstandard", errors="ignore")
+    if zstd is not None:
+        with zstd.ZstdDecompressor().stream_reader(b"") as reader:
+            binary_classes += (type(reader),)
 
-    return is_binary_class or "b" in getattr(handle, "mode", mode)
+    return binary_classes
@@ -25,8 +25,13 @@
     get_data_from_filepath,
     preprocess_data,
 )
+from pandas.util._decorators import doc
 
+from pandas.core import generic
 
+
+
+@doc(compression_options=generic._shared_docs["compression_options"] % "path_or_buffer")
 class BaseXMLFormatter:
     """
     Subclass for formatting data in XML.
@@ -74,9 +79,7 @@ class BaseXMLFormatter:
     stylesheet : str or file-like
         A URL, file, file-like object, or a raw string containing XSLT.
 
-    compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', 'zstd', None}, default 'infer'
-        Compression type for on-the-fly decompression of on-disk data.
-        If 'infer', then use extension for gzip, bz2, zip, xz, or zstandard.
+    {compression_options}
 
     storage_options : dict, optional
         Extra options that make sense for a particular storage connection,
 
@@ -64,6 +64,9 @@
     parse_table_schema,
 )
 from pandas.io.parsers.readers import validate_integer
+from pandas.util._decorators import doc
+
+from pandas.core import generic
 
 loads = json.loads
 dumps = json.dumps
@@ -314,7 +317,8 @@ def obj_to_write(self) -> NDFrame | Mapping[IndexLabel, Any]:
         return {"schema": self.schema, "data": self.obj}
 
 
-@doc(storage_options=generic._shared_docs["storage_options"])
+@doc(storage_options=generic._shared_docs["storage_options"],
+     decompression_options=generic._shared_docs["decompression_options"] % "path_or_buf")
 @deprecate_kwarg(old_arg_name="numpy", new_arg_name=None)
 @deprecate_nonkeyword_arguments(
     version="2.0", allowed_args=["path_or_buf"], stacklevel=3
@@ -475,12 +479,7 @@ def read_json(
 
            ``JsonReader`` is a context manager.
 
-    compression : {{'infer', 'gzip', 'bz2', 'zip', 'xz', 'zstd', None}}, default 'infer'
-        For on-the-fly decompression of on-disk data. If 'infer', then use
-        gzip, bz2, zip, xz, zstandard if path_or_buf is a string ending in
-        '.gz', '.bz2', '.zip', '.xz', or '.zst' respectively, and no decompression
-        otherwise. If using 'zip', the ZIP file must contain only one data
-        file to be read in. Set to None for no decompression.
+    {decompression_options}
 
     nrows : int, optional
         The number of lines from the line-delimited jsonfile that has to be read.