Skip to content

Commit 2a98498

Browse files
twoertweinluckyvs1
authored andcommitted
BUG: do not stringify file-like objects (pandas-dev#38141)
1 parent 9661766 commit 2a98498

File tree

4 files changed

+22
-10
lines changed

4 files changed

+22
-10
lines changed

doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -746,6 +746,7 @@ I/O
746746
- :meth:`read_fwf` was inferring compression with ``compression=None`` which was not consistent with the other :meth:``read_*`` functions (:issue:`37909`)
747747
- :meth:`DataFrame.to_html` was ignoring ``formatters`` argument for ``ExtensionDtype`` columns (:issue:`36525`)
748748
- Bumped minimum xarray version to 0.12.3 to avoid reference to the removed ``Panel`` class (:issue:`27101`)
749+
- :meth:`DataFrame.to_csv` was re-opening file-like handles that also implement ``os.PathLike`` (:issue:`38125`)
749750

750751
Period
751752
^^^^^^

pandas/io/common.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ def validate_header_arg(header) -> None:
152152

153153
def stringify_path(
154154
filepath_or_buffer: FilePathOrBuffer[AnyStr],
155+
convert_file_like: bool = False,
155156
) -> FileOrBuffer[AnyStr]:
156157
"""
157158
Attempt to convert a path-like object to a string.
@@ -169,12 +170,15 @@ def stringify_path(
169170
Objects supporting the fspath protocol (python 3.6+) are coerced
170171
according to its __fspath__ method.
171172
172-
For backwards compatibility with older pythons, pathlib.Path and
173-
py.path objects are specially coerced.
174-
175173
Any other object is passed through unchanged, which includes bytes,
176174
strings, buffers, or anything else that's not even path-like.
177175
"""
176+
if not convert_file_like and is_file_like(filepath_or_buffer):
177+
# GH 38125: some fsspec objects implement os.PathLike but have already opened a
178+
# file. This prevents opening the file a second time. infer_compression calls
179+
# this function with convert_file_like=True to infer the compression.
180+
return cast(FileOrBuffer[AnyStr], filepath_or_buffer)
181+
178182
if isinstance(filepath_or_buffer, os.PathLike):
179183
filepath_or_buffer = filepath_or_buffer.__fspath__()
180184
return _expand_user(filepath_or_buffer)
@@ -462,7 +466,7 @@ def infer_compression(
462466
# Infer compression
463467
if compression == "infer":
464468
# Convert all path types (e.g. pathlib.Path) to strings
465-
filepath_or_buffer = stringify_path(filepath_or_buffer)
469+
filepath_or_buffer = stringify_path(filepath_or_buffer, convert_file_like=True)
466470
if not isinstance(filepath_or_buffer, str):
467471
# Cannot infer compression of a buffer, assume no compression
468472
return None

pandas/io/parsers.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@
7474
from pandas.core.series import Series
7575
from pandas.core.tools import datetimes as tools
7676

77-
from pandas.io.common import IOHandles, get_handle, stringify_path, validate_header_arg
77+
from pandas.io.common import IOHandles, get_handle, validate_header_arg
7878
from pandas.io.date_converters import generic_parser
7979

8080
# BOM character (byte order mark)
@@ -774,7 +774,7 @@ class TextFileReader(abc.Iterator):
774774

775775
def __init__(self, f, engine=None, **kwds):
776776

777-
self.f = stringify_path(f)
777+
self.f = f
778778

779779
if engine is not None:
780780
engine_specified = True
@@ -859,14 +859,14 @@ def _get_options_with_defaults(self, engine):
859859

860860
def _check_file_or_buffer(self, f, engine):
861861
# see gh-16530
862-
if is_file_like(f):
862+
if is_file_like(f) and engine != "c" and not hasattr(f, "__next__"):
863863
# The C engine doesn't need the file-like to have the "__next__"
864864
# attribute. However, the Python engine explicitly calls
865865
# "__next__(...)" when iterating through such an object, meaning it
866866
# needs to have that attribute
867-
if engine != "c" and not hasattr(f, "__next__"):
868-
msg = "The 'python' engine cannot iterate through this file buffer."
869-
raise ValueError(msg)
867+
raise ValueError(
868+
"The 'python' engine cannot iterate through this file buffer."
869+
)
870870

871871
def _clean_options(self, options, engine):
872872
result = options.copy()

pandas/tests/io/test_common.py

+7
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,13 @@ def test_stringify_path_fspath(self):
8585
result = icom.stringify_path(p)
8686
assert result == "foo/bar.csv"
8787

88+
def test_stringify_file_and_path_like(self):
89+
# GH 38125: do not stringify file objects that are also path-like
90+
fsspec = pytest.importorskip("fsspec")
91+
with tm.ensure_clean() as path:
92+
with fsspec.open(f"file://{path}", mode="wb") as fsspec_obj:
93+
assert fsspec_obj == icom.stringify_path(fsspec_obj)
94+
8895
@pytest.mark.parametrize(
8996
"extension,expected",
9097
[

0 commit comments

Comments
 (0)