diff --git a/pandas/io/common.py b/pandas/io/common.py index 06b00a9cbb4eb..bc198cb14ac9a 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -72,6 +72,9 @@ class IOArgs: compression: CompressionDict should_close: bool = False + def __post_init__(self): + self.parent_exists = dir_exists(self.filepath_or_buffer) + @dataclasses.dataclass class IOHandles: @@ -630,6 +633,17 @@ def get_handle( compression_args = dict(ioargs.compression) compression = compression_args.pop("method") + # If the parent directory doesn't exist initializing the stream will fail (GH 24306) + if ( + is_path + and not ioargs.parent_exists + and _is_writable_mode(mode) + ): + os.makedirs( + os.path.dirname(ioargs.filepath_or_buffer), + exist_ok=True, + ) + if compression: # compression libraries do not like an explicit text-mode ioargs.mode = ioargs.mode.replace("t", "") @@ -937,6 +951,26 @@ def file_exists(filepath_or_buffer: FilePathOrBuffer) -> bool: return exists +def dir_exists(filepath_or_buffer: FilePathOrBuffer) -> bool: + """Test whether parent directory exists.""" + exists = False + filepath_or_buffer = stringify_path(filepath_or_buffer) + if not isinstance(filepath_or_buffer, str): + return exists + + dirname = os.path.dirname(filepath_or_buffer) + if not len(dirname): + # This is the current working directory + exists = True + else: + try: + exists = os.path.exists(dirname) + # gh-5874: if the filepath is too long will raise here + except (TypeError, ValueError): + pass + return exists + + def _is_binary_mode(handle: FilePathOrBuffer, mode: str) -> bool: """Whether the handle is opened in binary mode""" # specified by user @@ -951,3 +985,11 @@ def _is_binary_mode(handle: FilePathOrBuffer, mode: str) -> bool: # classes that expect bytes binary_classes = (BufferedIOBase, RawIOBase) return isinstance(handle, binary_classes) or "b" in getattr(handle, "mode", mode) + + +def _is_writable_mode(mode: str) -> bool: + """Whether the handle is opened in writable mode""" + writable_prefixes = ('a', 'w', 'r+') + if any(map(mode.startswith, writable_prefixes)): + return True + return False diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index d52ea01ac35de..31aa0beb6618e 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -533,3 +533,12 @@ def test_errno_attribute(): with pytest.raises(FileNotFoundError, match="\\[Errno 2\\]") as err: pd.read_csv("doesnt_exist") assert err.errno == errno.ENOENT + + +def test_create_missing_dirs(): + # GH 24306 + df = tm.makeDataFrame() + with tm.ensure_clean_dir() as fp: + full_path = os.path.join(fp, 'nonexistent/path/to/file.csv') + df.to_csv(full_path) + assert os.path.exists(full_path) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index d100c584b698a..ec1c986badf19 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -737,8 +737,6 @@ def test_expand_user(self, df_compat, monkeypatch): monkeypatch.setenv("USERPROFILE", "TestingUser") with pytest.raises(OSError, match=r".*TestingUser.*"): read_parquet("~/file.parquet") - with pytest.raises(OSError, match=r".*TestingUser.*"): - df_compat.to_parquet("~/file.parquet") def test_partition_cols_supported(self, pa, df_full): # GH #23283 diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py index 1e2973075f98e..a6b4c118b9142 100644 --- a/pandas/tests/io/xml/test_to_xml.py +++ b/pandas/tests/io/xml/test_to_xml.py @@ -199,13 +199,6 @@ def test_str_output(datapath, parser): assert output == from_file_expected -def test_wrong_file_path(parser): - with pytest.raises( - FileNotFoundError, match=("No such file or directory|没有那个文件或目录") - ): - geom_df.to_xml("/my/fake/path/output.xml", parser=parser) - - # INDEX