40
40
import warnings
41
41
import zipfile
42
42
43
+ from pandas .util ._decorators import doc
43
44
from pandas ._typing import (
44
45
BaseBuffer ,
45
46
CompressionDict ,
54
55
from pandas .util ._exceptions import find_stack_level
55
56
56
57
from pandas .core .dtypes .common import is_file_like
58
+ from pandas .core import generic
57
59
58
60
_VALID_URLS = set (uses_relative + uses_netloc + uses_params )
59
61
_VALID_URLS .discard ("" )
60
62
61
63
BaseBufferT = TypeVar ("BaseBufferT" , bound = BaseBuffer )
62
64
63
- # For the _is_binary_mode, we need to get python-zstandard's reader class because
64
- # it doesn't use any of the builtin base classes (such as RawIOBase).
65
- # Unfortunately python-zstandard doesn't expose that particular class, so we have
66
- # to get it through `zstd.open`.
67
- try :
68
- with import_optional_dependency ("zstandard" ).open (io .BytesIO ()) as reader :
69
- _ZstdDecompressorReader : type | None = type (reader )
70
- except ImportError :
71
- _ZstdDecompressorReader = None
72
-
73
65
74
66
@dataclasses .dataclass
75
67
class IOArgs :
@@ -257,6 +249,7 @@ def is_fsspec_url(url: FilePath | BaseBuffer) -> bool:
257
249
)
258
250
259
251
252
+ @doc (compression_options = generic ._shared_docs ["compression_options" ] % "filepath_or_buffer" )
260
253
def _get_filepath_or_buffer (
261
254
filepath_or_buffer : FilePath | BaseBuffer ,
262
255
encoding : str = "utf-8" ,
@@ -272,7 +265,7 @@ def _get_filepath_or_buffer(
272
265
----------
273
266
filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path),
274
267
or buffer
275
- compression : {{'gzip', 'bz2', 'zip', 'xz', 'zstd', None}}, optional
268
+ {compression_options}
276
269
encoding : the encoding to use to decode bytes, default is 'utf-8'
277
270
mode : str, optional
278
271
@@ -499,6 +492,7 @@ def get_compression_method(
499
492
return compression_method , compression_args
500
493
501
494
495
+ @doc (compression_options = generic ._shared_docs ["compression_options" ] % "filepath_or_buffer" )
502
496
def infer_compression (
503
497
filepath_or_buffer : FilePath | BaseBuffer , compression : str | None
504
498
) -> str | None :
@@ -512,10 +506,7 @@ def infer_compression(
512
506
----------
513
507
filepath_or_buffer : str or file handle
514
508
File path or object.
515
- compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', 'zstd', None}
516
- If 'infer' and `filepath_or_buffer` is path-like, then detect
517
- compression from the following extensions: '.gz', '.bz2', '.zip',
518
- '.xz', or '.zst' (otherwise no compression).
509
+ {compression_options}
519
510
520
511
Returns
521
512
-------
@@ -603,6 +594,7 @@ def get_handle(
603
594
...
604
595
605
596
597
+ @doc (compression_options = generic ._shared_docs ["compression_options" ] % "path_or_buf" )
606
598
def get_handle (
607
599
path_or_buf : FilePath | BaseBuffer ,
608
600
mode : str ,
@@ -625,15 +617,7 @@ def get_handle(
625
617
Mode to open path_or_buf with.
626
618
encoding : str or None
627
619
Encoding to use.
628
- compression : str or dict, default None
629
- If string, specifies compression mode. If dict, value at key 'method'
630
- specifies compression mode. Compression mode must be one of {'infer',
631
- 'gzip', 'bz2', 'zip', 'xz', 'zstd', None}. If compression mode is
632
- 'infer' and `filepath_or_buffer` is path-like, then detect compression
633
- from the following extensions: '.gz', '.bz2', '.zip', '.xz', or '.zst'
634
- (otherwise no compression). If dict and compression mode is one of
635
- {'zip', 'gzip', 'bz2', 'zstd'}, or inferred as one of the above,
636
- other entries passed as additional compression options.
620
+ {compression_options}
637
621
638
622
.. versionchanged:: 1.0.0
639
623
@@ -1117,11 +1101,23 @@ def _is_binary_mode(handle: FilePath | BaseBuffer, mode: str) -> bool:
1117
1101
if issubclass (type (handle ), text_classes ):
1118
1102
return False
1119
1103
1120
- # classes that expect bytes
1121
- binary_classes : list [type ] = [BufferedIOBase , RawIOBase ]
1122
- # Zstandard doesn't use any of the builtin base classes
1123
- if _ZstdDecompressorReader is not None :
1124
- binary_classes .append (_ZstdDecompressorReader )
1125
- is_binary_class = isinstance (handle , tuple (binary_classes ))
1104
+ return isinstance (handle , _get_binary_io_classes ()) or "b" in getattr (
1105
+ handle , "mode" , mode
1106
+ )
1107
+
1108
+
1109
+ def _get_binary_io_classes () -> tuple [type ]:
1110
+ """IO classes that that expect bytes"""
1111
+ binary_classes : tuple [type ] = (BufferedIOBase , RawIOBase )
1112
+
1113
+ # python-zstandard doesn't use any of the builtin base classes; instead we
1114
+ # have to use the `zstd.ZstdDecompressionReader` class for isinstance checks.
1115
+ # Unfortunately `zstd.ZstdDecompressionReader` isn't exposed by python-zstandard
1116
+ # so we have to get it from a `zstd.ZstdDecompressor` instance.
1117
+ # See also https://github.com/indygreg/python-zstandard/pull/165.
1118
+ zstd = import_optional_dependency ("zstandard" , errors = "ignore" )
1119
+ if zstd is not None :
1120
+ with zstd .ZstdDecompressor ().stream_reader (b"" ) as reader :
1121
+ binary_classes += (type (reader ),)
1126
1122
1127
- return is_binary_class or "b" in getattr ( handle , "mode" , mode )
1123
+ return binary_classes
0 commit comments