Skip to content

Commit 0df8858

Browse files
authored
de-privatize io.common functions (#30368)
1 parent 6efc237 commit 0df8858

18 files changed

+89
-89
lines changed

pandas/io/common.py

+17-17
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def __next__(self):
6464
raise AbstractMethodError(self)
6565

6666

67-
def _is_url(url) -> bool:
67+
def is_url(url) -> bool:
6868
"""
6969
Check to see if a URL has a valid protocol.
7070
@@ -102,7 +102,7 @@ def _expand_user(
102102
return filepath_or_buffer
103103

104104

105-
def _validate_header_arg(header) -> None:
105+
def validate_header_arg(header) -> None:
106106
if isinstance(header, bool):
107107
raise TypeError(
108108
"Passing a bool to header is invalid. "
@@ -112,7 +112,7 @@ def _validate_header_arg(header) -> None:
112112
)
113113

114114

115-
def _stringify_path(
115+
def stringify_path(
116116
filepath_or_buffer: FilePathOrBuffer[AnyStr],
117117
) -> FilePathOrBuffer[AnyStr]:
118118
"""Attempt to convert a path-like object to a string.
@@ -193,9 +193,9 @@ def get_filepath_or_buffer(
193193
compression, str,
194194
should_close, bool)
195195
"""
196-
filepath_or_buffer = _stringify_path(filepath_or_buffer)
196+
filepath_or_buffer = stringify_path(filepath_or_buffer)
197197

198-
if isinstance(filepath_or_buffer, str) and _is_url(filepath_or_buffer):
198+
if isinstance(filepath_or_buffer, str) and is_url(filepath_or_buffer):
199199
req = urlopen(filepath_or_buffer)
200200
content_encoding = req.headers.get("Content-Encoding", None)
201201
if content_encoding == "gzip":
@@ -250,7 +250,7 @@ def file_path_to_url(path: str) -> str:
250250
_compression_to_extension = {"gzip": ".gz", "bz2": ".bz2", "zip": ".zip", "xz": ".xz"}
251251

252252

253-
def _get_compression_method(
253+
def get_compression_method(
254254
compression: Optional[Union[str, Mapping[str, str]]]
255255
) -> Tuple[Optional[str], Dict[str, str]]:
256256
"""
@@ -283,7 +283,7 @@ def _get_compression_method(
283283
return compression, compression_args
284284

285285

286-
def _infer_compression(
286+
def infer_compression(
287287
filepath_or_buffer: FilePathOrBuffer, compression: Optional[str]
288288
) -> Optional[str]:
289289
"""
@@ -317,7 +317,7 @@ def _infer_compression(
317317
# Infer compression
318318
if compression == "infer":
319319
# Convert all path types (e.g. pathlib.Path) to strings
320-
filepath_or_buffer = _stringify_path(filepath_or_buffer)
320+
filepath_or_buffer = stringify_path(filepath_or_buffer)
321321
if not isinstance(filepath_or_buffer, str):
322322
# Cannot infer compression of a buffer, assume no compression
323323
return None
@@ -338,7 +338,7 @@ def _infer_compression(
338338
raise ValueError(msg)
339339

340340

341-
def _get_handle(
341+
def get_handle(
342342
path_or_buf,
343343
mode: str,
344344
encoding=None,
@@ -396,12 +396,12 @@ def _get_handle(
396396
f = path_or_buf
397397

398398
# Convert pathlib.Path/py.path.local or string
399-
path_or_buf = _stringify_path(path_or_buf)
399+
path_or_buf = stringify_path(path_or_buf)
400400
is_path = isinstance(path_or_buf, str)
401401

402-
compression, compression_args = _get_compression_method(compression)
402+
compression, compression_args = get_compression_method(compression)
403403
if is_path:
404-
compression = _infer_compression(path_or_buf, compression)
404+
compression = infer_compression(path_or_buf, compression)
405405

406406
if compression:
407407

@@ -421,7 +421,7 @@ def _get_handle(
421421

422422
# ZIP Compression
423423
elif compression == "zip":
424-
zf = BytesZipFile(path_or_buf, mode, **compression_args)
424+
zf = _BytesZipFile(path_or_buf, mode, **compression_args)
425425
# Ensure the container is closed as well.
426426
handles.append(zf)
427427
if zf.mode == "w":
@@ -472,7 +472,7 @@ def _get_handle(
472472

473473
if memory_map and hasattr(f, "fileno"):
474474
try:
475-
wrapped = MMapWrapper(f)
475+
wrapped = _MMapWrapper(f)
476476
f.close()
477477
f = wrapped
478478
except Exception:
@@ -485,7 +485,7 @@ def _get_handle(
485485
return f, handles
486486

487487

488-
class BytesZipFile(zipfile.ZipFile, BytesIO): # type: ignore
488+
class _BytesZipFile(zipfile.ZipFile, BytesIO): # type: ignore
489489
"""
490490
Wrapper for standard library class ZipFile and allow the returned file-like
491491
handle to accept byte strings via `write` method.
@@ -518,7 +518,7 @@ def closed(self):
518518
return self.fp is None
519519

520520

521-
class MMapWrapper(BaseIterator):
521+
class _MMapWrapper(BaseIterator):
522522
"""
523523
Wrapper for the Python's mmap class so that it can be properly read in
524524
by Python's csv.reader class.
@@ -537,7 +537,7 @@ def __init__(self, f: IO):
537537
def __getattr__(self, name: str):
538538
return getattr(self.mmap, name)
539539

540-
def __iter__(self) -> "MMapWrapper":
540+
def __iter__(self) -> "_MMapWrapper":
541541
return self
542542

543543
def __next__(self) -> str:

pandas/io/excel/_base.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@
1515
from pandas.core.frame import DataFrame
1616

1717
from pandas.io.common import (
18-
_is_url,
19-
_stringify_path,
20-
_validate_header_arg,
2118
get_filepath_or_buffer,
19+
is_url,
20+
stringify_path,
2221
urlopen,
22+
validate_header_arg,
2323
)
2424
from pandas.io.excel._util import (
2525
_fill_mi_header,
@@ -339,7 +339,7 @@ def read_excel(
339339
class _BaseExcelReader(metaclass=abc.ABCMeta):
340340
def __init__(self, filepath_or_buffer):
341341
# If filepath_or_buffer is a url, load the data into a BytesIO
342-
if _is_url(filepath_or_buffer):
342+
if is_url(filepath_or_buffer):
343343
filepath_or_buffer = BytesIO(urlopen(filepath_or_buffer).read())
344344
elif not isinstance(filepath_or_buffer, (ExcelFile, self._workbook_class)):
345345
filepath_or_buffer, _, _, _ = get_filepath_or_buffer(filepath_or_buffer)
@@ -408,7 +408,7 @@ def parse(
408408
**kwds,
409409
):
410410

411-
_validate_header_arg(header)
411+
validate_header_arg(header)
412412

413413
ret_dict = False
414414

@@ -708,7 +708,7 @@ def __init__(
708708
self.mode = mode
709709

710710
def __fspath__(self):
711-
return _stringify_path(self.path)
711+
return stringify_path(self.path)
712712

713713
def _get_sheet_name(self, sheet_name):
714714
if sheet_name is None:
@@ -808,7 +808,7 @@ def __init__(self, io, engine=None):
808808
# could be a str, ExcelFile, Book, etc.
809809
self.io = io
810810
# Always a string
811-
self._io = _stringify_path(io)
811+
self._io = stringify_path(io)
812812

813813
self._reader = self._engines[engine](self._io)
814814

pandas/io/feather_format.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from pandas import DataFrame, Int64Index, RangeIndex
66

7-
from pandas.io.common import _stringify_path
7+
from pandas.io.common import stringify_path
88

99

1010
def to_feather(df: DataFrame, path):
@@ -20,7 +20,7 @@ def to_feather(df: DataFrame, path):
2020
import_optional_dependency("pyarrow")
2121
from pyarrow import feather
2222

23-
path = _stringify_path(path)
23+
path = stringify_path(path)
2424

2525
if not isinstance(df, DataFrame):
2626
raise ValueError("feather only support IO with DataFrames")
@@ -98,6 +98,6 @@ def read_feather(path, columns=None, use_threads: bool = True):
9898
import_optional_dependency("pyarrow")
9999
from pyarrow import feather
100100

101-
path = _stringify_path(path)
101+
path = stringify_path(path)
102102

103103
return feather.read_feather(path, columns=columns, use_threads=bool(use_threads))

pandas/io/formats/csvs.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@
2323

2424
from pandas.io.common import (
2525
UnicodeWriter,
26-
_get_compression_method,
27-
_get_handle,
28-
_infer_compression,
26+
get_compression_method,
2927
get_filepath_or_buffer,
28+
get_handle,
29+
infer_compression,
3030
)
3131

3232

@@ -61,7 +61,7 @@ def __init__(
6161
path_or_buf = StringIO()
6262

6363
# Extract compression mode as given, if dict
64-
compression, self.compression_args = _get_compression_method(compression)
64+
compression, self.compression_args = get_compression_method(compression)
6565

6666
self.path_or_buf, _, _, _ = get_filepath_or_buffer(
6767
path_or_buf, encoding=encoding, compression=compression, mode=mode
@@ -78,7 +78,7 @@ def __init__(
7878
if encoding is None:
7979
encoding = "utf-8"
8080
self.encoding = encoding
81-
self.compression = _infer_compression(self.path_or_buf, compression)
81+
self.compression = infer_compression(self.path_or_buf, compression)
8282

8383
if quoting is None:
8484
quoting = csvlib.QUOTE_MINIMAL
@@ -179,7 +179,7 @@ def save(self):
179179
f = self.path_or_buf
180180
close = False
181181
else:
182-
f, handles = _get_handle(
182+
f, handles = get_handle(
183183
self.path_or_buf,
184184
self.mode,
185185
encoding=self.encoding,
@@ -212,7 +212,7 @@ def save(self):
212212
else:
213213
compression = dict(self.compression_args, method=self.compression)
214214

215-
f, handles = _get_handle(
215+
f, handles = get_handle(
216216
self.path_or_buf,
217217
self.mode,
218218
encoding=self.encoding,

pandas/io/formats/excel.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from pandas import Index
1616
import pandas.core.common as com
1717

18+
from pandas.io.common import stringify_path
1819
from pandas.io.formats.css import CSSResolver, CSSWarning
1920
from pandas.io.formats.format import get_level_lengths
2021
from pandas.io.formats.printing import pprint_thing
@@ -711,7 +712,6 @@ def write(
711712
and ``io.excel.xlsm.writer``.
712713
"""
713714
from pandas.io.excel import ExcelWriter
714-
from pandas.io.common import _stringify_path
715715

716716
num_rows, num_cols = self.df.shape
717717
if num_rows > self.max_rows or num_cols > self.max_cols:
@@ -724,7 +724,7 @@ def write(
724724
if isinstance(writer, ExcelWriter):
725725
need_save = False
726726
else:
727-
writer = ExcelWriter(_stringify_path(writer), engine=engine)
727+
writer = ExcelWriter(stringify_path(writer), engine=engine)
728728
need_save = True
729729

730730
formatted_cells = self.get_formatted_cells()

pandas/io/formats/format.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@
7272
from pandas.core.indexes.datetimes import DatetimeIndex
7373
from pandas.core.indexes.timedeltas import TimedeltaIndex
7474

75-
from pandas.io.common import _stringify_path
75+
from pandas.io.common import stringify_path
7676
from pandas.io.formats.printing import adjoin, justify, pprint_thing
7777

7878
if TYPE_CHECKING:
@@ -482,7 +482,7 @@ def get_buffer(
482482
objects, otherwise yield buf unchanged.
483483
"""
484484
if buf is not None:
485-
buf = _stringify_path(buf)
485+
buf = stringify_path(buf)
486486
else:
487487
buf = StringIO()
488488

pandas/io/formats/html.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
from pandas import option_context
1414

15-
from pandas.io.common import _is_url
15+
from pandas.io.common import is_url
1616
from pandas.io.formats.format import (
1717
DataFrameFormatter,
1818
TableFormatter,
@@ -147,7 +147,7 @@ def _write_cell(
147147

148148
rs = pprint_thing(s, escape_chars=esc).strip()
149149

150-
if self.render_links and _is_url(rs):
150+
if self.render_links and is_url(rs):
151151
rs_unescaped = pprint_thing(s, escape_chars={}).strip()
152152
start_tag += '<a href="{url}" target="_blank">'.format(url=rs_unescaped)
153153
end_a = "</a>"

pandas/io/html.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
from pandas.core.construction import create_series_with_explicit_dtype
1818

19-
from pandas.io.common import _is_url, _validate_header_arg, urlopen
19+
from pandas.io.common import is_url, urlopen, validate_header_arg
2020
from pandas.io.formats.printing import pprint_thing
2121
from pandas.io.parsers import TextParser
2222

@@ -117,7 +117,7 @@ def _read(obj):
117117
-------
118118
raw_text : str
119119
"""
120-
if _is_url(obj):
120+
if is_url(obj):
121121
with urlopen(obj) as url:
122122
text = url.read()
123123
elif hasattr(obj, "read"):
@@ -705,7 +705,7 @@ def _build_doc(self):
705705
parser = HTMLParser(recover=True, encoding=self.encoding)
706706

707707
try:
708-
if _is_url(self.io):
708+
if is_url(self.io):
709709
with urlopen(self.io) as f:
710710
r = parse(f, parser=parser)
711711
else:
@@ -717,7 +717,7 @@ def _build_doc(self):
717717
pass
718718
except (UnicodeDecodeError, IOError) as e:
719719
# if the input is a blob of html goop
720-
if not _is_url(self.io):
720+
if not is_url(self.io):
721721
r = fromstring(self.io, parser=parser)
722722

723723
try:
@@ -1076,7 +1076,7 @@ def read_html(
10761076
"cannot skip rows starting from the end of the "
10771077
"data (you passed a negative value)"
10781078
)
1079-
_validate_header_arg(header)
1079+
validate_header_arg(header)
10801080
return _parse(
10811081
flavor=flavor,
10821082
io=io,

0 commit comments

Comments
 (0)