From 718884e0dcaa47a2ad6ef32a1cc6b573a18ab238 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 3 Aug 2019 20:24:00 +0100 Subject: [PATCH 1/3] TYPING: more type hints for io.common --- pandas/io/common.py | 36 +++++++++++++++++++++++++++++++----- pandas/io/parsers.py | 2 +- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/pandas/io/common.py b/pandas/io/common.py index e01e473047b88..c9330acbdc7a1 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -10,7 +10,8 @@ import mmap import os import pathlib -from typing import IO, AnyStr, BinaryIO, Optional, TextIO, Type +from pathlib import Path +from typing import IO, AnyStr, BinaryIO, Optional, TextIO, Tuple, Type, Union, overload from urllib.error import URLError # noqa from urllib.parse import ( # noqa urlencode, @@ -126,9 +127,17 @@ def _validate_header_arg(header) -> None: ) -def _stringify_path( - filepath_or_buffer: FilePathOrBuffer[AnyStr] -) -> FilePathOrBuffer[AnyStr]: +@overload +def _stringify_path(filepath_or_buffer: Union[str, Path]) -> str: + ... + + +@overload +def _stringify_path(filepath_or_buffer: IO[AnyStr]) -> IO[AnyStr]: + ... + + +def _stringify_path(filepath_or_buffer): """Attempt to convert a path-like object to a string. Parameters @@ -174,11 +183,28 @@ def is_gcs_url(url) -> bool: return False +@overload def get_filepath_or_buffer( - filepath_or_buffer: FilePathOrBuffer, + filepath_or_buffer: IO[AnyStr], encoding: Optional[str] = None, compression: Optional[str] = None, mode: Optional[str] = None, +) -> Tuple[IO[AnyStr], Optional[str], Optional[str], bool]: + ... + + +@overload +def get_filepath_or_buffer( + filepath_or_buffer: Union[str, Path], + encoding: Optional[str] = None, + compression: Optional[str] = None, + mode: Optional[str] = None, +) -> Tuple[Union[str, IO], Optional[str], Optional[str], bool]: + ... + + +def get_filepath_or_buffer( + filepath_or_buffer, encoding=None, compression=None, mode=None ): """ If the filepath_or_buffer is a url, translate and return the buffer. diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index f4b00b0aac5f7..95edc06af39fe 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -458,7 +458,7 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds): finally: parser.close() - if should_close: + if should_close and not isinstance(fp_or_buf, str): try: fp_or_buf.close() except ValueError: From 011494ffea1b67675734942e293b77372380d0e8 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 24 Aug 2019 10:48:18 +0100 Subject: [PATCH 2/3] add comments --- pandas/io/common.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/pandas/io/common.py b/pandas/io/common.py index 0fc0a978503e5..77f30e793843e 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -129,6 +129,11 @@ def _validate_header_arg(header) -> None: ) +# Overload *variants* for '_stringify_path'. +# These variants give extra information to the type checker. +# They are ignored at runtime. + + @overload def _stringify_path(filepath_or_buffer: Union[str, Path]) -> str: ... @@ -139,6 +144,17 @@ def _stringify_path(filepath_or_buffer: IO[AnyStr]) -> IO[AnyStr]: ... +# The actual *implementation* of '_stringify_path'. +# The implementation contains the actual runtime logic. +# +# It may or may not have type hints. If it does, mypy +# will check the body of the implementation against the +# type hints. +# +# Mypy will also check and make sure the signature is +# consistent with the provided variants. + + def _stringify_path(filepath_or_buffer): """Attempt to convert a path-like object to a string. @@ -185,6 +201,11 @@ def is_gcs_url(url) -> bool: return False +# Overload *variants* for 'get_filepath_or_buffer'. +# These variants give extra information to the type checker. +# They are ignored at runtime. + + @overload def get_filepath_or_buffer( filepath_or_buffer: IO[AnyStr], @@ -205,6 +226,17 @@ def get_filepath_or_buffer( ... +# The actual *implementation* of 'get_filepath_or_buffer'. +# The implementation contains the actual runtime logic. +# +# It may or may not have type hints. If it does, mypy +# will check the body of the implementation against the +# type hints. +# +# Mypy will also check and make sure the signature is +# consistent with the provided variants. + + def get_filepath_or_buffer( filepath_or_buffer, encoding=None, compression=None, mode=None ): From d83a7549ed3787971b2a32f85e3e42a7993cbac2 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Tue, 27 Aug 2019 16:27:56 +0100 Subject: [PATCH 3/3] revert removal of type hints for actual implementation --- pandas/io/common.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/pandas/io/common.py b/pandas/io/common.py index f113bde6bf2e7..a505f28982aeb 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -168,7 +168,7 @@ def _stringify_path(filepath_or_buffer: IO[AnyStr]) -> IO[AnyStr]: # consistent with the provided variants. -def _stringify_path(filepath_or_buffer): +def _stringify_path(filepath_or_buffer: FilePathOrBuffer): """Attempt to convert a path-like object to a string. Parameters @@ -251,7 +251,10 @@ def get_filepath_or_buffer( def get_filepath_or_buffer( - filepath_or_buffer, encoding=None, compression=None, mode=None + filepath_or_buffer: FilePathOrBuffer, + encoding: Optional[str] = None, + compression: Optional[str] = None, + mode: Optional[str] = None, ): """ If the filepath_or_buffer is a url, translate and return the buffer. @@ -272,10 +275,10 @@ def get_filepath_or_buffer( compression, str, should_close, bool) """ - filepath_or_buffer = _stringify_path(filepath_or_buffer) + fp_or_buf = _stringify_path(filepath_or_buffer) - if isinstance(filepath_or_buffer, str) and _is_url(filepath_or_buffer): - req = urlopen(filepath_or_buffer) + if isinstance(fp_or_buf, str) and _is_url(fp_or_buf): + req = urlopen(fp_or_buf) content_encoding = req.headers.get("Content-Encoding", None) if content_encoding == "gzip": # Override compression based on Content-Encoding header @@ -284,28 +287,28 @@ def get_filepath_or_buffer( req.close() return reader, encoding, compression, True - if is_s3_url(filepath_or_buffer): + if is_s3_url(fp_or_buf): from pandas.io import s3 return s3.get_filepath_or_buffer( - filepath_or_buffer, encoding=encoding, compression=compression, mode=mode + fp_or_buf, encoding=encoding, compression=compression, mode=mode ) - if is_gcs_url(filepath_or_buffer): + if is_gcs_url(fp_or_buf): from pandas.io import gcs return gcs.get_filepath_or_buffer( - filepath_or_buffer, encoding=encoding, compression=compression, mode=mode + fp_or_buf, encoding=encoding, compression=compression, mode=mode ) - if isinstance(filepath_or_buffer, (str, bytes, mmap.mmap)): - return _expand_user(filepath_or_buffer), None, compression, False + if isinstance(fp_or_buf, (str, bytes, mmap.mmap)): + return _expand_user(fp_or_buf), None, compression, False - if not is_file_like(filepath_or_buffer): + if not is_file_like(fp_or_buf): msg = "Invalid file path or buffer object type: {_type}" - raise ValueError(msg.format(_type=type(filepath_or_buffer))) + raise ValueError(msg.format(_type=type(fp_or_buf))) - return filepath_or_buffer, None, compression, False + return fp_or_buf, None, compression, False def file_path_to_url(path: str) -> str: