Skip to content

REF: de-privatize io.common functions used elsewhere #30368

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 20, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 17 additions & 17 deletions pandas/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def __next__(self):
raise AbstractMethodError(self)


def _is_url(url) -> bool:
def is_url(url) -> bool:
"""
Check to see if a URL has a valid protocol.

Expand Down Expand Up @@ -102,7 +102,7 @@ def _expand_user(
return filepath_or_buffer


def _validate_header_arg(header) -> None:
def validate_header_arg(header) -> None:
if isinstance(header, bool):
raise TypeError(
"Passing a bool to header is invalid. "
Expand All @@ -112,7 +112,7 @@ def _validate_header_arg(header) -> None:
)


def _stringify_path(
def stringify_path(
filepath_or_buffer: FilePathOrBuffer[AnyStr],
) -> FilePathOrBuffer[AnyStr]:
"""Attempt to convert a path-like object to a string.
Expand Down Expand Up @@ -193,9 +193,9 @@ def get_filepath_or_buffer(
compression, str,
should_close, bool)
"""
filepath_or_buffer = _stringify_path(filepath_or_buffer)
filepath_or_buffer = stringify_path(filepath_or_buffer)

if isinstance(filepath_or_buffer, str) and _is_url(filepath_or_buffer):
if isinstance(filepath_or_buffer, str) and is_url(filepath_or_buffer):
req = urlopen(filepath_or_buffer)
content_encoding = req.headers.get("Content-Encoding", None)
if content_encoding == "gzip":
Expand Down Expand Up @@ -250,7 +250,7 @@ def file_path_to_url(path: str) -> str:
_compression_to_extension = {"gzip": ".gz", "bz2": ".bz2", "zip": ".zip", "xz": ".xz"}


def _get_compression_method(
def get_compression_method(
compression: Optional[Union[str, Mapping[str, str]]]
) -> Tuple[Optional[str], Dict[str, str]]:
"""
Expand Down Expand Up @@ -283,7 +283,7 @@ def _get_compression_method(
return compression, compression_args


def _infer_compression(
def infer_compression(
filepath_or_buffer: FilePathOrBuffer, compression: Optional[str]
) -> Optional[str]:
"""
Expand Down Expand Up @@ -317,7 +317,7 @@ def _infer_compression(
# Infer compression
if compression == "infer":
# Convert all path types (e.g. pathlib.Path) to strings
filepath_or_buffer = _stringify_path(filepath_or_buffer)
filepath_or_buffer = stringify_path(filepath_or_buffer)
if not isinstance(filepath_or_buffer, str):
# Cannot infer compression of a buffer, assume no compression
return None
Expand All @@ -338,7 +338,7 @@ def _infer_compression(
raise ValueError(msg)


def _get_handle(
def get_handle(
path_or_buf,
mode: str,
encoding=None,
Expand Down Expand Up @@ -396,12 +396,12 @@ def _get_handle(
f = path_or_buf

# Convert pathlib.Path/py.path.local or string
path_or_buf = _stringify_path(path_or_buf)
path_or_buf = stringify_path(path_or_buf)
is_path = isinstance(path_or_buf, str)

compression, compression_args = _get_compression_method(compression)
compression, compression_args = get_compression_method(compression)
if is_path:
compression = _infer_compression(path_or_buf, compression)
compression = infer_compression(path_or_buf, compression)

if compression:

Expand All @@ -421,7 +421,7 @@ def _get_handle(

# ZIP Compression
elif compression == "zip":
zf = BytesZipFile(path_or_buf, mode, **compression_args)
zf = _BytesZipFile(path_or_buf, mode, **compression_args)
# Ensure the container is closed as well.
handles.append(zf)
if zf.mode == "w":
Expand Down Expand Up @@ -472,7 +472,7 @@ def _get_handle(

if memory_map and hasattr(f, "fileno"):
try:
wrapped = MMapWrapper(f)
wrapped = _MMapWrapper(f)
f.close()
f = wrapped
except Exception:
Expand All @@ -485,7 +485,7 @@ def _get_handle(
return f, handles


class BytesZipFile(zipfile.ZipFile, BytesIO): # type: ignore
class _BytesZipFile(zipfile.ZipFile, BytesIO): # type: ignore
"""
Wrapper for standard library class ZipFile and allow the returned file-like
handle to accept byte strings via `write` method.
Expand Down Expand Up @@ -518,7 +518,7 @@ def closed(self):
return self.fp is None


class MMapWrapper(BaseIterator):
class _MMapWrapper(BaseIterator):
"""
Wrapper for the Python's mmap class so that it can be properly read in
by Python's csv.reader class.
Expand All @@ -537,7 +537,7 @@ def __init__(self, f: IO):
def __getattr__(self, name: str):
return getattr(self.mmap, name)

def __iter__(self) -> "MMapWrapper":
def __iter__(self) -> "_MMapWrapper":
return self

def __next__(self) -> str:
Expand Down
14 changes: 7 additions & 7 deletions pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
from pandas.core.frame import DataFrame

from pandas.io.common import (
_is_url,
_stringify_path,
_validate_header_arg,
get_filepath_or_buffer,
is_url,
stringify_path,
urlopen,
validate_header_arg,
)
from pandas.io.excel._util import (
_fill_mi_header,
Expand Down Expand Up @@ -339,7 +339,7 @@ def read_excel(
class _BaseExcelReader(metaclass=abc.ABCMeta):
def __init__(self, filepath_or_buffer):
# If filepath_or_buffer is a url, load the data into a BytesIO
if _is_url(filepath_or_buffer):
if is_url(filepath_or_buffer):
filepath_or_buffer = BytesIO(urlopen(filepath_or_buffer).read())
elif not isinstance(filepath_or_buffer, (ExcelFile, self._workbook_class)):
filepath_or_buffer, _, _, _ = get_filepath_or_buffer(filepath_or_buffer)
Expand Down Expand Up @@ -408,7 +408,7 @@ def parse(
**kwds,
):

_validate_header_arg(header)
validate_header_arg(header)

ret_dict = False

Expand Down Expand Up @@ -708,7 +708,7 @@ def __init__(
self.mode = mode

def __fspath__(self):
return _stringify_path(self.path)
return stringify_path(self.path)

def _get_sheet_name(self, sheet_name):
if sheet_name is None:
Expand Down Expand Up @@ -808,7 +808,7 @@ def __init__(self, io, engine=None):
# could be a str, ExcelFile, Book, etc.
self.io = io
# Always a string
self._io = _stringify_path(io)
self._io = stringify_path(io)

self._reader = self._engines[engine](self._io)

Expand Down
6 changes: 3 additions & 3 deletions pandas/io/feather_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from pandas import DataFrame, Int64Index, RangeIndex

from pandas.io.common import _stringify_path
from pandas.io.common import stringify_path


def to_feather(df: DataFrame, path):
Expand All @@ -20,7 +20,7 @@ def to_feather(df: DataFrame, path):
import_optional_dependency("pyarrow")
from pyarrow import feather

path = _stringify_path(path)
path = stringify_path(path)

if not isinstance(df, DataFrame):
raise ValueError("feather only support IO with DataFrames")
Expand Down Expand Up @@ -98,6 +98,6 @@ def read_feather(path, columns=None, use_threads: bool = True):
import_optional_dependency("pyarrow")
from pyarrow import feather

path = _stringify_path(path)
path = stringify_path(path)

return feather.read_feather(path, columns=columns, use_threads=bool(use_threads))
14 changes: 7 additions & 7 deletions pandas/io/formats/csvs.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@

from pandas.io.common import (
UnicodeWriter,
_get_compression_method,
_get_handle,
_infer_compression,
get_compression_method,
get_filepath_or_buffer,
get_handle,
infer_compression,
)


Expand Down Expand Up @@ -61,7 +61,7 @@ def __init__(
path_or_buf = StringIO()

# Extract compression mode as given, if dict
compression, self.compression_args = _get_compression_method(compression)
compression, self.compression_args = get_compression_method(compression)

self.path_or_buf, _, _, _ = get_filepath_or_buffer(
path_or_buf, encoding=encoding, compression=compression, mode=mode
Expand All @@ -78,7 +78,7 @@ def __init__(
if encoding is None:
encoding = "utf-8"
self.encoding = encoding
self.compression = _infer_compression(self.path_or_buf, compression)
self.compression = infer_compression(self.path_or_buf, compression)

if quoting is None:
quoting = csvlib.QUOTE_MINIMAL
Expand Down Expand Up @@ -179,7 +179,7 @@ def save(self):
f = self.path_or_buf
close = False
else:
f, handles = _get_handle(
f, handles = get_handle(
self.path_or_buf,
self.mode,
encoding=self.encoding,
Expand Down Expand Up @@ -212,7 +212,7 @@ def save(self):
else:
compression = dict(self.compression_args, method=self.compression)

f, handles = _get_handle(
f, handles = get_handle(
self.path_or_buf,
self.mode,
encoding=self.encoding,
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/formats/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from pandas import Index
import pandas.core.common as com

from pandas.io.common import stringify_path
from pandas.io.formats.css import CSSResolver, CSSWarning
from pandas.io.formats.format import get_level_lengths
from pandas.io.formats.printing import pprint_thing
Expand Down Expand Up @@ -711,7 +712,6 @@ def write(
and ``io.excel.xlsm.writer``.
"""
from pandas.io.excel import ExcelWriter
from pandas.io.common import _stringify_path

num_rows, num_cols = self.df.shape
if num_rows > self.max_rows or num_cols > self.max_cols:
Expand All @@ -724,7 +724,7 @@ def write(
if isinstance(writer, ExcelWriter):
need_save = False
else:
writer = ExcelWriter(_stringify_path(writer), engine=engine)
writer = ExcelWriter(stringify_path(writer), engine=engine)
need_save = True

formatted_cells = self.get_formatted_cells()
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.core.indexes.timedeltas import TimedeltaIndex

from pandas.io.common import _stringify_path
from pandas.io.common import stringify_path
from pandas.io.formats.printing import adjoin, justify, pprint_thing

if TYPE_CHECKING:
Expand Down Expand Up @@ -482,7 +482,7 @@ def get_buffer(
objects, otherwise yield buf unchanged.
"""
if buf is not None:
buf = _stringify_path(buf)
buf = stringify_path(buf)
else:
buf = StringIO()

Expand Down
4 changes: 2 additions & 2 deletions pandas/io/formats/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from pandas import option_context

from pandas.io.common import _is_url
from pandas.io.common import is_url
from pandas.io.formats.format import (
DataFrameFormatter,
TableFormatter,
Expand Down Expand Up @@ -147,7 +147,7 @@ def _write_cell(

rs = pprint_thing(s, escape_chars=esc).strip()

if self.render_links and _is_url(rs):
if self.render_links and is_url(rs):
rs_unescaped = pprint_thing(s, escape_chars={}).strip()
start_tag += '<a href="{url}" target="_blank">'.format(url=rs_unescaped)
end_a = "</a>"
Expand Down
10 changes: 5 additions & 5 deletions pandas/io/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from pandas.core.construction import create_series_with_explicit_dtype

from pandas.io.common import _is_url, _validate_header_arg, urlopen
from pandas.io.common import is_url, urlopen, validate_header_arg
from pandas.io.formats.printing import pprint_thing
from pandas.io.parsers import TextParser

Expand Down Expand Up @@ -117,7 +117,7 @@ def _read(obj):
-------
raw_text : str
"""
if _is_url(obj):
if is_url(obj):
with urlopen(obj) as url:
text = url.read()
elif hasattr(obj, "read"):
Expand Down Expand Up @@ -705,7 +705,7 @@ def _build_doc(self):
parser = HTMLParser(recover=True, encoding=self.encoding)

try:
if _is_url(self.io):
if is_url(self.io):
with urlopen(self.io) as f:
r = parse(f, parser=parser)
else:
Expand All @@ -717,7 +717,7 @@ def _build_doc(self):
pass
except (UnicodeDecodeError, IOError) as e:
# if the input is a blob of html goop
if not _is_url(self.io):
if not is_url(self.io):
r = fromstring(self.io, parser=parser)

try:
Expand Down Expand Up @@ -1076,7 +1076,7 @@ def read_html(
"cannot skip rows starting from the end of the "
"data (you passed a negative value)"
)
_validate_header_arg(header)
validate_header_arg(header)
return _parse(
flavor=flavor,
io=io,
Expand Down
Loading