diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 7b9997e8f0bd6..328499a4ae98e 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -107,6 +107,7 @@ Other enhancements - :meth:`DataFrame.to_stata` and :meth:`StataWriter` now accept the keyword only argument ``value_labels`` to save labels for non-categorical columns - Methods that relied on hashmap based algos such as :meth:`DataFrameGroupBy.value_counts`, :meth:`DataFrameGroupBy.count` and :func:`factorize` ignored imaginary component for complex numbers (:issue:`17927`) - Add :meth:`Series.str.removeprefix` and :meth:`Series.str.removesuffix` introduced in Python 3.9 to remove pre-/suffixes from string-type :class:`Series` (:issue:`36944`) +- Attempting to write into a file in missing parent directory with :meth:`DataFrame.to_csv`, :meth:`DataFrame.to_html`, :meth:`DataFrame.to_excel`, :meth:`DataFrame.to_feather`, :meth:`DataFrame.to_parquet`, :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_json`, :meth:`DataFrame.to_pickle`, and :meth:`DataFrame.to_xml` now explicitly mentions missing parent directory, the same is true for :class:`Series` counterparts (:issue:`24306`) .. --------------------------------------------------------------------------- diff --git a/pandas/io/common.py b/pandas/io/common.py index 5c5b9c65b8abd..46be1f9bb09b2 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -17,6 +17,7 @@ ) import mmap import os +from pathlib import Path import tempfile from typing import ( IO, @@ -520,6 +521,21 @@ def infer_compression( raise ValueError(msg) +def check_parent_directory(path: Path | str) -> None: + """ + Check if parent directory of a file exists, raise OSError if it does not + + Parameters + ---------- + path: Path or str + Path to check parent directory of + + """ + parent = Path(path).parent + if not parent.is_dir(): + raise OSError(fr"Cannot save file into a non-existent directory: '{parent}'") + + def get_handle( path_or_buf: FilePathOrBuffer, mode: str, @@ -632,6 +648,10 @@ def get_handle( compression_args = dict(ioargs.compression) compression = compression_args.pop("method") + # Only for write methods + if "r" not in mode and is_path: + check_parent_directory(str(handle)) + if compression: # compression libraries do not like an explicit text-mode ioargs.mode = ioargs.mode.replace("t", "") diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 3fd3d84f90161..d636838d21d0e 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -96,7 +96,10 @@ from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.reshape.concat import concat -from pandas.io.common import stringify_path +from pandas.io.common import ( + check_parent_directory, + stringify_path, +) from pandas.io.formats.printing import ( adjoin, justify, @@ -1147,6 +1150,7 @@ def get_buffer(buf: FilePathOrBuffer[str] | None, encoding: str | None = None): if hasattr(buf, "write"): yield buf elif isinstance(buf, str): + check_parent_directory(str(buf)) with open(buf, "w", encoding=encoding, newline="") as f: # GH#30034 open instead of codecs.open prevents a file leak # if we have an invalid encoding argument. diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index fc834c7acf39f..ad0b25d26d6f6 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -227,6 +227,33 @@ def test_read_non_existent(self, reader, module, error_class, fn_ext): ): reader(path) + @pytest.mark.parametrize( + "method, module, error_class, fn_ext", + [ + (pd.DataFrame.to_csv, "os", OSError, "csv"), + (pd.DataFrame.to_html, "os", OSError, "html"), + (pd.DataFrame.to_excel, "xlrd", OSError, "xlsx"), + (pd.DataFrame.to_feather, "pyarrow", OSError, "feather"), + (pd.DataFrame.to_parquet, "pyarrow", OSError, "parquet"), + (pd.DataFrame.to_stata, "os", OSError, "dta"), + (pd.DataFrame.to_json, "os", OSError, "json"), + (pd.DataFrame.to_pickle, "os", OSError, "pickle"), + ], + ) + # NOTE: Missing parent directory for pd.DataFrame.to_hdf is handled by PyTables + def test_write_missing_parent_directory(self, method, module, error_class, fn_ext): + pytest.importorskip(module) + + dummy_frame = pd.DataFrame({"a": [1, 2, 3], "b": [2, 3, 4], "c": [3, 4, 5]}) + + path = os.path.join(HERE, "data", "missing_folder", "does_not_exist." + fn_ext) + + with pytest.raises( + error_class, + match=r"Cannot save file into a non-existent directory: .*missing_folder", + ): + method(dummy_frame, path) + @pytest.mark.parametrize( "reader, module, error_class, fn_ext", [ diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py index 4f4815b9008ad..b8d146c597d2c 100644 --- a/pandas/tests/io/xml/test_to_xml.py +++ b/pandas/tests/io/xml/test_to_xml.py @@ -202,10 +202,13 @@ def test_str_output(datapath, parser): def test_wrong_file_path(parser): + path = "/my/fake/path/output.xml" + with pytest.raises( - FileNotFoundError, match=("No such file or directory|没有那个文件或目录") + OSError, + match=(r"Cannot save file into a non-existent directory: .*path"), ): - geom_df.to_xml("/my/fake/path/output.xml", parser=parser) + geom_df.to_xml(path, parser=parser) # INDEX