Skip to content

Commit 2334af8

Browse files
committed
Extract 'compression' arg docs to shared docs
1 parent 9e19ad3 commit 2334af8

File tree

18 files changed

+179
-196
lines changed

18 files changed

+179
-196
lines changed

doc/source/conf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@
215215

216216
# The theme to use for HTML and HTML Help pages. Major themes that come with
217217
# Sphinx are currently 'default' and 'sphinxdoc'.
218-
html_theme = "pydata_sphinx_theme"
218+
#html_theme = "pydata_sphinx_theme"
219219

220220
# The style sheet to use for HTML and HTML Help pages. A file of that name
221221
# must exist either in Sphinx' static/ path, or in one of the custom paths

environment.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ dependencies:
3838
# documentation (jupyter notebooks)
3939
- nbconvert>=5.4.1
4040
- nbsphinx
41-
- pandoc
41+
#- pandoc
4242

4343
# Dask and its dependencies (that dont install with dask)
4444
- dask-core

pandas/_testing/_io.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
ReadPickleBuffer,
1616
)
1717
from pandas.compat import get_lzma_file
18+
from pandas.compat._optional import import_optional_dependency
1819

1920
import pandas as pd
2021
from pandas._testing._random import rands
@@ -392,10 +393,7 @@ def write_to_compressed(compression, path, data, dest="test"):
392393
elif compression == "bz2":
393394
compress_method = bz2.BZ2File
394395
elif compression == "zstd":
395-
import pytest
396-
397-
zstd = pytest.importorskip("zstandard")
398-
compress_method = zstd.open
396+
compress_method = import_optional_dependency("zstandard").open
399397
elif compression == "xz":
400398
compress_method = get_lzma_file()
401399
else:

pandas/conftest.py

+17-9
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,6 @@
4444
utc,
4545
)
4646

47-
try:
48-
import zstandard as zstd # noqa: F401
49-
50-
have_zstd = True
51-
except ImportError:
52-
have_zstd = False
53-
5447
import pandas.util._test_decorators as td
5548

5649
from pandas.core.dtypes.dtypes import (
@@ -275,7 +268,14 @@ def other_closed(request):
275268

276269

277270
@pytest.fixture(
278-
params=[None, "gzip", "bz2", "zip", "xz"] + (["zstd"] if have_zstd else [])
271+
params=[
272+
None,
273+
"gzip",
274+
"bz2",
275+
"zip",
276+
"xz",
277+
pytest.param("zstd", marks=td.skip_if_no("zstandard")),
278+
]
279279
)
280280
def compression(request):
281281
"""
@@ -284,7 +284,15 @@ def compression(request):
284284
return request.param
285285

286286

287-
@pytest.fixture(params=["gzip", "bz2", "zip", "xz"] + (["zstd"] if have_zstd else []))
287+
@pytest.fixture(
288+
params=[
289+
"gzip",
290+
"bz2",
291+
"zip",
292+
"xz",
293+
pytest.param("zstd", marks=td.skip_if_no("zstandard")),
294+
]
295+
)
288296
def compression_only(request):
289297
"""
290298
Fixture for trying common compression types in compression tests excluding

pandas/core/describe.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535

3636
from pandas.core.reshape.concat import concat
3737

38-
from pandas.io.formats.format import format_percentiles
3938

4039
if TYPE_CHECKING:
4140
from pandas import (
@@ -229,6 +228,7 @@ def describe_numeric_1d(series: Series, percentiles: Sequence[float]) -> Series:
229228
The percentiles to include in the output.
230229
"""
231230
from pandas import Series
231+
from pandas.io.formats.format import format_percentiles
232232

233233
# error: Argument 1 to "format_percentiles" has incompatible type "Sequence[float]";
234234
# expected "Union[ndarray, List[Union[int, float]], List[float], List[Union[str,

pandas/core/frame.py

+6-19
Original file line numberDiff line numberDiff line change
@@ -2486,7 +2486,8 @@ def _from_arrays(
24862486
)
24872487
return cls(mgr)
24882488

2489-
@doc(storage_options=generic._shared_docs["storage_options"])
2489+
@doc(storage_options=generic._shared_docs["storage_options"],
2490+
compression_options=generic._shared_docs["compression_options"] % "path")
24902491
@deprecate_kwarg(old_arg_name="fname", new_arg_name="path")
24912492
def to_stata(
24922493
self,
@@ -2565,16 +2566,7 @@ def to_stata(
25652566
format. Only available if version is 117. Storing strings in the
25662567
StrL format can produce smaller dta files if strings have more than
25672568
8 characters and values are repeated.
2568-
compression : str or dict, default 'infer'
2569-
For on-the-fly compression of the output dta. If string, specifies
2570-
compression mode. If dict, value at key 'method' specifies
2571-
compression mode. Compression mode must be one of {{'infer', 'gzip',
2572-
'bz2', 'zip', 'xz', 'zstd', None}}. If compression mode is 'infer' and
2573-
`fname` is path-like, then detect compression from the following
2574-
extensions: '.gz', '.bz2', '.zip', '.xz', '.zst' (otherwise no
2575-
compression). If dict and compression mode is one of {{'zip',
2576-
'gzip', 'bz2', 'zstd'}}, or inferred as one of the above, other entries
2577-
passed as additional compression options.
2569+
{compression_options}
25782570
25792571
.. versionadded:: 1.1.0
25802572
@@ -2943,7 +2935,8 @@ def to_html(
29432935
render_links=render_links,
29442936
)
29452937

2946-
@doc(storage_options=generic._shared_docs["storage_options"])
2938+
@doc(storage_options=generic._shared_docs["storage_options"],
2939+
compression_options=generic._shared_docs["compression_options"] % "path_or_buffer")
29472940
def to_xml(
29482941
self,
29492942
path_or_buffer: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,
@@ -3020,13 +3013,7 @@ def to_xml(
30203013
layout of elements and attributes from original output. This
30213014
argument requires ``lxml`` to be installed. Only XSLT 1.0
30223015
scripts and not later versions is currently supported.
3023-
compression : {{'infer', 'gzip', 'bz2', 'zip', 'xz', 'zstd', None}},
3024-
default 'infer'.
3025-
For on-the-fly decompression of on-disk data. If 'infer', then use
3026-
gzip, bz2, zip, xz, or zstandard if path_or_buffer is a string ending in
3027-
'.gz', '.bz2', '.zip', '.xz', '.zst' respectively, and no decompression
3028-
otherwise. If using 'zip', the ZIP file must contain only one data
3029-
file to be read in. Set to None for no decompression.
3016+
{compression_options}
30303017
{storage_options}
30313018
30323019
Returns

pandas/core/shared_docs.py

+29
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,35 @@
402402
starting with "s3://", and "gcs://") the key-value pairs are forwarded to
403403
``fsspec``. Please see ``fsspec`` and ``urllib`` for more details."""
404404

405+
_shared_docs[
406+
"compression_options"
407+
] = """compression : str or dict, default 'infer'
408+
For on-the-fly compression of the output data. If 'infer' and '%s'
409+
path-like, then detect compression from the following extensions: '.gz',
410+
'.bz2', '.zip', '.xz', or '.zst' (otherwise no compression). Set to
411+
``None`` for no compression. Can also be a dict with key ``'method'`` set
412+
to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``} and other
413+
key-value pairs are forwarded to ``zipfile.ZipFile``, ``gzip.GzipFile``,
414+
``bz2.BZ2File``, or ``zstandard.ZstdDecompressor``, respectively. As an
415+
example, the following could be passed for faster compression and to create
416+
a reproducible gzip archive: ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``.
417+
"""
418+
419+
_shared_docs[
420+
"decompression_options"
421+
] = """compression : str or dict, default 'infer'
422+
For on-the-fly decompression of on-disk data. If 'infer' and '%s' is
423+
path-like, then detect compression from the following extensions: '.gz',
424+
'.bz2', '.zip', '.xz', or '.zst' (otherwise no compression). If using
425+
'zip', the ZIP file must contain only one data file to be read in. Set to
426+
``None`` for no decompression. Can also be a dict with key ``'method'`` set
427+
to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``} and other
428+
key-value pairs are forwarded to ``zipfile.ZipFile``, ``gzip.GzipFile``,
429+
``bz2.BZ2File``, or ``zstandard.ZstdDecompressor``, respectively. As an
430+
example, the following could be passed for Zstandard decompression using a
431+
custom compression dictionary: ``compression={'method': 'zstd', 'dict_data': my_compression_dict}``.
432+
"""
433+
405434
_shared_docs[
406435
"replace"
407436
] = """

pandas/io/common.py

+27-31
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
import warnings
4141
import zipfile
4242

43+
from pandas.util._decorators import doc
4344
from pandas._typing import (
4445
BaseBuffer,
4546
CompressionDict,
@@ -54,22 +55,13 @@
5455
from pandas.util._exceptions import find_stack_level
5556

5657
from pandas.core.dtypes.common import is_file_like
58+
from pandas.core import generic
5759

5860
_VALID_URLS = set(uses_relative + uses_netloc + uses_params)
5961
_VALID_URLS.discard("")
6062

6163
BaseBufferT = TypeVar("BaseBufferT", bound=BaseBuffer)
6264

63-
# For the _is_binary_mode, we need to get python-zstandard's reader class because
64-
# it doesn't use any of the builtin base classes (such as RawIOBase).
65-
# Unfortunately python-zstandard doesn't expose that particular class, so we have
66-
# to get it through `zstd.open`.
67-
try:
68-
with import_optional_dependency("zstandard").open(io.BytesIO()) as reader:
69-
_ZstdDecompressorReader: type | None = type(reader)
70-
except ImportError:
71-
_ZstdDecompressorReader = None
72-
7365

7466
@dataclasses.dataclass
7567
class IOArgs:
@@ -257,6 +249,7 @@ def is_fsspec_url(url: FilePath | BaseBuffer) -> bool:
257249
)
258250

259251

252+
@doc(compression_options=generic._shared_docs["compression_options"] % "filepath_or_buffer")
260253
def _get_filepath_or_buffer(
261254
filepath_or_buffer: FilePath | BaseBuffer,
262255
encoding: str = "utf-8",
@@ -272,7 +265,7 @@ def _get_filepath_or_buffer(
272265
----------
273266
filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path),
274267
or buffer
275-
compression : {{'gzip', 'bz2', 'zip', 'xz', 'zstd', None}}, optional
268+
{compression_options}
276269
encoding : the encoding to use to decode bytes, default is 'utf-8'
277270
mode : str, optional
278271
@@ -499,6 +492,7 @@ def get_compression_method(
499492
return compression_method, compression_args
500493

501494

495+
@doc(compression_options=generic._shared_docs["compression_options"] % "filepath_or_buffer")
502496
def infer_compression(
503497
filepath_or_buffer: FilePath | BaseBuffer, compression: str | None
504498
) -> str | None:
@@ -512,10 +506,7 @@ def infer_compression(
512506
----------
513507
filepath_or_buffer : str or file handle
514508
File path or object.
515-
compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', 'zstd', None}
516-
If 'infer' and `filepath_or_buffer` is path-like, then detect
517-
compression from the following extensions: '.gz', '.bz2', '.zip',
518-
'.xz', or '.zst' (otherwise no compression).
509+
{compression_options}
519510
520511
Returns
521512
-------
@@ -603,6 +594,7 @@ def get_handle(
603594
...
604595

605596

597+
@doc(compression_options=generic._shared_docs["compression_options"] % "path_or_buf")
606598
def get_handle(
607599
path_or_buf: FilePath | BaseBuffer,
608600
mode: str,
@@ -625,15 +617,7 @@ def get_handle(
625617
Mode to open path_or_buf with.
626618
encoding : str or None
627619
Encoding to use.
628-
compression : str or dict, default None
629-
If string, specifies compression mode. If dict, value at key 'method'
630-
specifies compression mode. Compression mode must be one of {'infer',
631-
'gzip', 'bz2', 'zip', 'xz', 'zstd', None}. If compression mode is
632-
'infer' and `filepath_or_buffer` is path-like, then detect compression
633-
from the following extensions: '.gz', '.bz2', '.zip', '.xz', or '.zst'
634-
(otherwise no compression). If dict and compression mode is one of
635-
{'zip', 'gzip', 'bz2', 'zstd'}, or inferred as one of the above,
636-
other entries passed as additional compression options.
620+
{compression_options}
637621
638622
.. versionchanged:: 1.0.0
639623
@@ -1117,11 +1101,23 @@ def _is_binary_mode(handle: FilePath | BaseBuffer, mode: str) -> bool:
11171101
if issubclass(type(handle), text_classes):
11181102
return False
11191103

1120-
# classes that expect bytes
1121-
binary_classes: list[type] = [BufferedIOBase, RawIOBase]
1122-
# Zstandard doesn't use any of the builtin base classes
1123-
if _ZstdDecompressorReader is not None:
1124-
binary_classes.append(_ZstdDecompressorReader)
1125-
is_binary_class = isinstance(handle, tuple(binary_classes))
1104+
return isinstance(handle, _get_binary_io_classes()) or "b" in getattr(
1105+
handle, "mode", mode
1106+
)
1107+
1108+
1109+
def _get_binary_io_classes() -> tuple[type]:
1110+
"""IO classes that that expect bytes"""
1111+
binary_classes: tuple[type] = (BufferedIOBase, RawIOBase)
1112+
1113+
# python-zstandard doesn't use any of the builtin base classes; instead we
1114+
# have to use the `zstd.ZstdDecompressionReader` class for isinstance checks.
1115+
# Unfortunately `zstd.ZstdDecompressionReader` isn't exposed by python-zstandard
1116+
# so we have to get it from a `zstd.ZstdDecompressor` instance.
1117+
# See also https://github.com/indygreg/python-zstandard/pull/165.
1118+
zstd = import_optional_dependency("zstandard", errors="ignore")
1119+
if zstd is not None:
1120+
with zstd.ZstdDecompressor().stream_reader(b"") as reader:
1121+
binary_classes += (type(reader),)
11261122

1127-
return is_binary_class or "b" in getattr(handle, "mode", mode)
1123+
return binary_classes

pandas/io/formats/xml.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,13 @@
2525
get_data_from_filepath,
2626
preprocess_data,
2727
)
28+
from pandas.util._decorators import doc
2829

30+
from pandas.core import generic
2931

32+
33+
34+
@doc(compression_options=generic._shared_docs["compression_options"] % "path_or_buffer")
3035
class BaseXMLFormatter:
3136
"""
3237
Subclass for formatting data in XML.
@@ -74,9 +79,7 @@ class BaseXMLFormatter:
7479
stylesheet : str or file-like
7580
A URL, file, file-like object, or a raw string containing XSLT.
7681
77-
compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', 'zstd', None}, default 'infer'
78-
Compression type for on-the-fly decompression of on-disk data.
79-
If 'infer', then use extension for gzip, bz2, zip, xz, or zstandard.
82+
{compression_options}
8083
8184
storage_options : dict, optional
8285
Extra options that make sense for a particular storage connection,

pandas/io/json/_json.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@
6464
parse_table_schema,
6565
)
6666
from pandas.io.parsers.readers import validate_integer
67+
from pandas.util._decorators import doc
68+
69+
from pandas.core import generic
6770

6871
loads = json.loads
6972
dumps = json.dumps
@@ -314,7 +317,8 @@ def obj_to_write(self) -> NDFrame | Mapping[IndexLabel, Any]:
314317
return {"schema": self.schema, "data": self.obj}
315318

316319

317-
@doc(storage_options=generic._shared_docs["storage_options"])
320+
@doc(storage_options=generic._shared_docs["storage_options"],
321+
decompression_options=generic._shared_docs["decompression_options"] % "path_or_buf")
318322
@deprecate_kwarg(old_arg_name="numpy", new_arg_name=None)
319323
@deprecate_nonkeyword_arguments(
320324
version="2.0", allowed_args=["path_or_buf"], stacklevel=3
@@ -475,12 +479,7 @@ def read_json(
475479
476480
``JsonReader`` is a context manager.
477481
478-
compression : {{'infer', 'gzip', 'bz2', 'zip', 'xz', 'zstd', None}}, default 'infer'
479-
For on-the-fly decompression of on-disk data. If 'infer', then use
480-
gzip, bz2, zip, xz, zstandard if path_or_buf is a string ending in
481-
'.gz', '.bz2', '.zip', '.xz', or '.zst' respectively, and no decompression
482-
otherwise. If using 'zip', the ZIP file must contain only one data
483-
file to be read in. Set to None for no decompression.
482+
{decompression_options}
484483
485484
nrows : int, optional
486485
The number of lines from the line-delimited jsonfile that has to be read.

0 commit comments

Comments
 (0)