diff --git a/pandas/io/common.py b/pandas/io/common.py index 290022167e520..a505f28982aeb 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -9,6 +9,7 @@ import mmap import os import pathlib +from pathlib import Path from typing import ( IO, Any, @@ -21,6 +22,7 @@ Tuple, Type, Union, + overload, ) from urllib.error import URLError # noqa from urllib.parse import ( # noqa @@ -140,9 +142,33 @@ def _validate_header_arg(header) -> None: ) -def _stringify_path( - filepath_or_buffer: FilePathOrBuffer[AnyStr] -) -> FilePathOrBuffer[AnyStr]: +# Overload *variants* for '_stringify_path'. +# These variants give extra information to the type checker. +# They are ignored at runtime. + + +@overload +def _stringify_path(filepath_or_buffer: Union[str, Path]) -> str: + ... + + +@overload +def _stringify_path(filepath_or_buffer: IO[AnyStr]) -> IO[AnyStr]: + ... + + +# The actual *implementation* of '_stringify_path'. +# The implementation contains the actual runtime logic. +# +# It may or may not have type hints. If it does, mypy +# will check the body of the implementation against the +# type hints. +# +# Mypy will also check and make sure the signature is +# consistent with the provided variants. + + +def _stringify_path(filepath_or_buffer: FilePathOrBuffer): """Attempt to convert a path-like object to a string. Parameters @@ -188,6 +214,42 @@ def is_gcs_url(url) -> bool: return False +# Overload *variants* for 'get_filepath_or_buffer'. +# These variants give extra information to the type checker. +# They are ignored at runtime. + + +@overload +def get_filepath_or_buffer( + filepath_or_buffer: IO[AnyStr], + encoding: Optional[str] = None, + compression: Optional[str] = None, + mode: Optional[str] = None, +) -> Tuple[IO[AnyStr], Optional[str], Optional[str], bool]: + ... + + +@overload +def get_filepath_or_buffer( + filepath_or_buffer: Union[str, Path], + encoding: Optional[str] = None, + compression: Optional[str] = None, + mode: Optional[str] = None, +) -> Tuple[Union[str, IO], Optional[str], Optional[str], bool]: + ... + + +# The actual *implementation* of 'get_filepath_or_buffer'. +# The implementation contains the actual runtime logic. +# +# It may or may not have type hints. If it does, mypy +# will check the body of the implementation against the +# type hints. +# +# Mypy will also check and make sure the signature is +# consistent with the provided variants. + + def get_filepath_or_buffer( filepath_or_buffer: FilePathOrBuffer, encoding: Optional[str] = None, @@ -213,10 +275,10 @@ def get_filepath_or_buffer( compression, str, should_close, bool) """ - filepath_or_buffer = _stringify_path(filepath_or_buffer) + fp_or_buf = _stringify_path(filepath_or_buffer) - if isinstance(filepath_or_buffer, str) and _is_url(filepath_or_buffer): - req = urlopen(filepath_or_buffer) + if isinstance(fp_or_buf, str) and _is_url(fp_or_buf): + req = urlopen(fp_or_buf) content_encoding = req.headers.get("Content-Encoding", None) if content_encoding == "gzip": # Override compression based on Content-Encoding header @@ -225,28 +287,28 @@ def get_filepath_or_buffer( req.close() return reader, encoding, compression, True - if is_s3_url(filepath_or_buffer): + if is_s3_url(fp_or_buf): from pandas.io import s3 return s3.get_filepath_or_buffer( - filepath_or_buffer, encoding=encoding, compression=compression, mode=mode + fp_or_buf, encoding=encoding, compression=compression, mode=mode ) - if is_gcs_url(filepath_or_buffer): + if is_gcs_url(fp_or_buf): from pandas.io import gcs return gcs.get_filepath_or_buffer( - filepath_or_buffer, encoding=encoding, compression=compression, mode=mode + fp_or_buf, encoding=encoding, compression=compression, mode=mode ) - if isinstance(filepath_or_buffer, (str, bytes, mmap.mmap)): - return _expand_user(filepath_or_buffer), None, compression, False + if isinstance(fp_or_buf, (str, bytes, mmap.mmap)): + return _expand_user(fp_or_buf), None, compression, False - if not is_file_like(filepath_or_buffer): + if not is_file_like(fp_or_buf): msg = "Invalid file path or buffer object type: {_type}" - raise ValueError(msg.format(_type=type(filepath_or_buffer))) + raise ValueError(msg.format(_type=type(fp_or_buf))) - return filepath_or_buffer, None, compression, False + return fp_or_buf, None, compression, False def file_path_to_url(path: str) -> str: diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index a3ff837bc7f52..d790c8bb08597 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -458,7 +458,7 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds): finally: parser.close() - if should_close: + if should_close and not isinstance(fp_or_buf, str): try: fp_or_buf.close() except ValueError: