From b47ec1e4e4e855391d4205ae1a0ebd7cb782f3c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sat, 28 Nov 2020 15:20:14 -0500 Subject: [PATCH] BUG: do not stringify file-like objects --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/io/common.py | 12 ++++++++---- pandas/io/parsers.py | 12 ++++++------ pandas/tests/io/test_common.py | 7 +++++++ 4 files changed, 22 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index bc7f5b8174573..4906288cc07d9 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -746,6 +746,7 @@ I/O - :meth:`read_fwf` was inferring compression with ``compression=None`` which was not consistent with the other :meth:``read_*`` functions (:issue:`37909`) - :meth:`DataFrame.to_html` was ignoring ``formatters`` argument for ``ExtensionDtype`` columns (:issue:`36525`) - Bumped minimum xarray version to 0.12.3 to avoid reference to the removed ``Panel`` class (:issue:`27101`) +- :meth:`DataFrame.to_csv` was re-opening file-like handles that also implement ``os.PathLike`` (:issue:`38125`) Period ^^^^^^ diff --git a/pandas/io/common.py b/pandas/io/common.py index 9fede5180e727..64c5d3173fe0a 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -152,6 +152,7 @@ def validate_header_arg(header) -> None: def stringify_path( filepath_or_buffer: FilePathOrBuffer[AnyStr], + convert_file_like: bool = False, ) -> FileOrBuffer[AnyStr]: """ Attempt to convert a path-like object to a string. @@ -169,12 +170,15 @@ def stringify_path( Objects supporting the fspath protocol (python 3.6+) are coerced according to its __fspath__ method. - For backwards compatibility with older pythons, pathlib.Path and - py.path objects are specially coerced. - Any other object is passed through unchanged, which includes bytes, strings, buffers, or anything else that's not even path-like. """ + if not convert_file_like and is_file_like(filepath_or_buffer): + # GH 38125: some fsspec objects implement os.PathLike but have already opened a + # file. This prevents opening the file a second time. infer_compression calls + # this function with convert_file_like=True to infer the compression. + return cast(FileOrBuffer[AnyStr], filepath_or_buffer) + if isinstance(filepath_or_buffer, os.PathLike): filepath_or_buffer = filepath_or_buffer.__fspath__() return _expand_user(filepath_or_buffer) @@ -462,7 +466,7 @@ def infer_compression( # Infer compression if compression == "infer": # Convert all path types (e.g. pathlib.Path) to strings - filepath_or_buffer = stringify_path(filepath_or_buffer) + filepath_or_buffer = stringify_path(filepath_or_buffer, convert_file_like=True) if not isinstance(filepath_or_buffer, str): # Cannot infer compression of a buffer, assume no compression return None diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 3244b1c0f65b2..2e656edeee74a 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -74,7 +74,7 @@ from pandas.core.series import Series from pandas.core.tools import datetimes as tools -from pandas.io.common import IOHandles, get_handle, stringify_path, validate_header_arg +from pandas.io.common import IOHandles, get_handle, validate_header_arg from pandas.io.date_converters import generic_parser # BOM character (byte order mark) @@ -774,7 +774,7 @@ class TextFileReader(abc.Iterator): def __init__(self, f, engine=None, **kwds): - self.f = stringify_path(f) + self.f = f if engine is not None: engine_specified = True @@ -859,14 +859,14 @@ def _get_options_with_defaults(self, engine): def _check_file_or_buffer(self, f, engine): # see gh-16530 - if is_file_like(f): + if is_file_like(f) and engine != "c" and not hasattr(f, "__next__"): # The C engine doesn't need the file-like to have the "__next__" # attribute. However, the Python engine explicitly calls # "__next__(...)" when iterating through such an object, meaning it # needs to have that attribute - if engine != "c" and not hasattr(f, "__next__"): - msg = "The 'python' engine cannot iterate through this file buffer." - raise ValueError(msg) + raise ValueError( + "The 'python' engine cannot iterate through this file buffer." + ) def _clean_options(self, options, engine): result = options.copy() diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index c3b21daa0ac04..34cb00e89ea0c 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -85,6 +85,13 @@ def test_stringify_path_fspath(self): result = icom.stringify_path(p) assert result == "foo/bar.csv" + def test_stringify_file_and_path_like(self): + # GH 38125: do not stringify file objects that are also path-like + fsspec = pytest.importorskip("fsspec") + with tm.ensure_clean() as path: + with fsspec.open(f"file://{path}", mode="wb") as fsspec_obj: + assert fsspec_obj == icom.stringify_path(fsspec_obj) + @pytest.mark.parametrize( "extension,expected", [