Skip to content

Commit fb32477

Browse files
ENH: Improve IOError handling on missing parent directory for Series/DataFrame write methods #24306 (#43436)
1 parent 5f36af3 commit fb32477

File tree

5 files changed

+58
-3
lines changed

5 files changed

+58
-3
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ Other enhancements
107107
- :meth:`DataFrame.to_stata` and :meth:`StataWriter` now accept the keyword only argument ``value_labels`` to save labels for non-categorical columns
108108
- Methods that relied on hashmap based algos such as :meth:`DataFrameGroupBy.value_counts`, :meth:`DataFrameGroupBy.count` and :func:`factorize` ignored imaginary component for complex numbers (:issue:`17927`)
109109
- Add :meth:`Series.str.removeprefix` and :meth:`Series.str.removesuffix` introduced in Python 3.9 to remove pre-/suffixes from string-type :class:`Series` (:issue:`36944`)
110+
- Attempting to write into a file in missing parent directory with :meth:`DataFrame.to_csv`, :meth:`DataFrame.to_html`, :meth:`DataFrame.to_excel`, :meth:`DataFrame.to_feather`, :meth:`DataFrame.to_parquet`, :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_json`, :meth:`DataFrame.to_pickle`, and :meth:`DataFrame.to_xml` now explicitly mentions missing parent directory, the same is true for :class:`Series` counterparts (:issue:`24306`)
110111

111112

112113
.. ---------------------------------------------------------------------------

pandas/io/common.py

+20
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
)
1818
import mmap
1919
import os
20+
from pathlib import Path
2021
import tempfile
2122
from typing import (
2223
IO,
@@ -520,6 +521,21 @@ def infer_compression(
520521
raise ValueError(msg)
521522

522523

524+
def check_parent_directory(path: Path | str) -> None:
525+
"""
526+
Check if parent directory of a file exists, raise OSError if it does not
527+
528+
Parameters
529+
----------
530+
path: Path or str
531+
Path to check parent directory of
532+
533+
"""
534+
parent = Path(path).parent
535+
if not parent.is_dir():
536+
raise OSError(fr"Cannot save file into a non-existent directory: '{parent}'")
537+
538+
523539
def get_handle(
524540
path_or_buf: FilePathOrBuffer,
525541
mode: str,
@@ -632,6 +648,10 @@ def get_handle(
632648
compression_args = dict(ioargs.compression)
633649
compression = compression_args.pop("method")
634650

651+
# Only for write methods
652+
if "r" not in mode and is_path:
653+
check_parent_directory(str(handle))
654+
635655
if compression:
636656
# compression libraries do not like an explicit text-mode
637657
ioargs.mode = ioargs.mode.replace("t", "")

pandas/io/formats/format.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,10 @@
9696
from pandas.core.indexes.timedeltas import TimedeltaIndex
9797
from pandas.core.reshape.concat import concat
9898

99-
from pandas.io.common import stringify_path
99+
from pandas.io.common import (
100+
check_parent_directory,
101+
stringify_path,
102+
)
100103
from pandas.io.formats.printing import (
101104
adjoin,
102105
justify,
@@ -1147,6 +1150,7 @@ def get_buffer(buf: FilePathOrBuffer[str] | None, encoding: str | None = None):
11471150
if hasattr(buf, "write"):
11481151
yield buf
11491152
elif isinstance(buf, str):
1153+
check_parent_directory(str(buf))
11501154
with open(buf, "w", encoding=encoding, newline="") as f:
11511155
# GH#30034 open instead of codecs.open prevents a file leak
11521156
# if we have an invalid encoding argument.

pandas/tests/io/test_common.py

+27
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,33 @@ def test_read_non_existent(self, reader, module, error_class, fn_ext):
227227
):
228228
reader(path)
229229

230+
@pytest.mark.parametrize(
231+
"method, module, error_class, fn_ext",
232+
[
233+
(pd.DataFrame.to_csv, "os", OSError, "csv"),
234+
(pd.DataFrame.to_html, "os", OSError, "html"),
235+
(pd.DataFrame.to_excel, "xlrd", OSError, "xlsx"),
236+
(pd.DataFrame.to_feather, "pyarrow", OSError, "feather"),
237+
(pd.DataFrame.to_parquet, "pyarrow", OSError, "parquet"),
238+
(pd.DataFrame.to_stata, "os", OSError, "dta"),
239+
(pd.DataFrame.to_json, "os", OSError, "json"),
240+
(pd.DataFrame.to_pickle, "os", OSError, "pickle"),
241+
],
242+
)
243+
# NOTE: Missing parent directory for pd.DataFrame.to_hdf is handled by PyTables
244+
def test_write_missing_parent_directory(self, method, module, error_class, fn_ext):
245+
pytest.importorskip(module)
246+
247+
dummy_frame = pd.DataFrame({"a": [1, 2, 3], "b": [2, 3, 4], "c": [3, 4, 5]})
248+
249+
path = os.path.join(HERE, "data", "missing_folder", "does_not_exist." + fn_ext)
250+
251+
with pytest.raises(
252+
error_class,
253+
match=r"Cannot save file into a non-existent directory: .*missing_folder",
254+
):
255+
method(dummy_frame, path)
256+
230257
@pytest.mark.parametrize(
231258
"reader, module, error_class, fn_ext",
232259
[

pandas/tests/io/xml/test_to_xml.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -202,10 +202,13 @@ def test_str_output(datapath, parser):
202202

203203

204204
def test_wrong_file_path(parser):
205+
path = "/my/fake/path/output.xml"
206+
205207
with pytest.raises(
206-
FileNotFoundError, match=("No such file or directory|没有那个文件或目录")
208+
OSError,
209+
match=(r"Cannot save file into a non-existent directory: .*path"),
207210
):
208-
geom_df.to_xml("/my/fake/path/output.xml", parser=parser)
211+
geom_df.to_xml(path, parser=parser)
209212

210213

211214
# INDEX

0 commit comments

Comments
 (0)