From 130cd886804f5f0be99fb6faf23742f921f44f29 Mon Sep 17 00:00:00 2001 From: Rob Kimball Date: Sat, 26 Jun 2021 12:16:05 -0400 Subject: [PATCH 1/8] Adds ability to recursively create missing directories when writing files pandas-dev#24306 --- pandas/io/common.py | 36 ++++++++++++++++++++++++++++++++++ pandas/tests/io/test_common.py | 13 ++++++++++++ 2 files changed, 49 insertions(+) diff --git a/pandas/io/common.py b/pandas/io/common.py index 06b00a9cbb4eb..5c736d9143ef3 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -72,6 +72,9 @@ class IOArgs: compression: CompressionDict should_close: bool = False + def __post_init__(self): + self.parent_exists = dir_exists(self.filepath_or_buffer) + @dataclasses.dataclass class IOHandles: @@ -630,6 +633,17 @@ def get_handle( compression_args = dict(ioargs.compression) compression = compression_args.pop("method") + # If the parent directory doesn't exist initializing the stream will fail (GH 24306) + if ( + _is_writable_mode(mode) + and is_path + and not ioargs.parent_exists + ): + os.makedirs( + os.path.dirname(ioargs.filepath_or_buffer), + exist_ok=True, + ) + if compression: # compression libraries do not like an explicit text-mode ioargs.mode = ioargs.mode.replace("t", "") @@ -937,6 +951,20 @@ def file_exists(filepath_or_buffer: FilePathOrBuffer) -> bool: return exists +def dir_exists(filepath_or_buffer: FilePathOrBuffer) -> bool: + """Test whether parent directory exists.""" + exists = False + filepath_or_buffer = stringify_path(filepath_or_buffer) + if not isinstance(filepath_or_buffer, str): + return exists + try: + exists = os.path.exists(os.path.dirname(filepath_or_buffer)) + # gh-5874: if the filepath is too long will raise here + except (TypeError, ValueError): + pass + return exists + + def _is_binary_mode(handle: FilePathOrBuffer, mode: str) -> bool: """Whether the handle is opened in binary mode""" # specified by user @@ -951,3 +979,11 @@ def _is_binary_mode(handle: FilePathOrBuffer, mode: str) -> bool: # classes that expect bytes binary_classes = (BufferedIOBase, RawIOBase) return isinstance(handle, binary_classes) or "b" in getattr(handle, "mode", mode) + + +def _is_writable_mode(mode: str) -> bool: + """Whether the handle is opened in writable mode""" + writable_prefixes = ('a', 'w', 'r+') + if any(map(mode.startswith, writable_prefixes)): + return True + return False diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index d52ea01ac35de..1b0177d7b351f 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -533,3 +533,16 @@ def test_errno_attribute(): with pytest.raises(FileNotFoundError, match="\\[Errno 2\\]") as err: pd.read_csv("doesnt_exist") assert err.errno == errno.ENOENT + + +def test_create_missing_dirs(): + # GH 24306 + df = tm.makeDataFrame() + filepath = 'nonexistent/path/to/file.csv' + df.to_csv(filepath) + assert os.path.exists(filepath) + # Cleanup after test: + os.remove(filepath) + components = filepath.split('/') + for i in reversed(range(1, len(components))): + os.rmdir(os.path.join(*components[:i])) From ef8196b93e2f7e24d7483d684172720d0b5fc5f9 Mon Sep 17 00:00:00 2001 From: Rob Kimball Date: Sat, 26 Jun 2021 12:22:59 -0400 Subject: [PATCH 2/8] Fixes indent --- pandas/io/common.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/io/common.py b/pandas/io/common.py index 5c736d9143ef3..ed8659f5c70dc 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -635,9 +635,9 @@ def get_handle( # If the parent directory doesn't exist initializing the stream will fail (GH 24306) if ( - _is_writable_mode(mode) - and is_path - and not ioargs.parent_exists + _is_writable_mode(mode) + and is_path + and not ioargs.parent_exists ): os.makedirs( os.path.dirname(ioargs.filepath_or_buffer), From c8a9f0dcdf3ee31c3fe71f52ff9efb7873c36994 Mon Sep 17 00:00:00 2001 From: Rob Kimball Date: Sat, 26 Jun 2021 13:17:34 -0400 Subject: [PATCH 3/8] Fixes failing test, directory exists if dirname is '' --- pandas/io/common.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/pandas/io/common.py b/pandas/io/common.py index ed8659f5c70dc..fca400871389b 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -957,11 +957,17 @@ def dir_exists(filepath_or_buffer: FilePathOrBuffer) -> bool: filepath_or_buffer = stringify_path(filepath_or_buffer) if not isinstance(filepath_or_buffer, str): return exists - try: - exists = os.path.exists(os.path.dirname(filepath_or_buffer)) - # gh-5874: if the filepath is too long will raise here - except (TypeError, ValueError): - pass + + dirname = os.path.dirname(filepath_or_buffer) + if not len(dirname): + # This is the current working directory + exists = True + else: + try: + exists = os.path.exists(dirname) + # gh-5874: if the filepath is too long will raise here + except (TypeError, ValueError): + pass return exists From 59d3f973cc7940996606c153907151e68993931c Mon Sep 17 00:00:00 2001 From: Rob Kimball Date: Sat, 26 Jun 2021 13:29:59 -0400 Subject: [PATCH 4/8] Enhance test to use _testing context manager (re:flake8) --- pandas/tests/io/test_common.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 1b0177d7b351f..5053b189502b2 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -538,11 +538,7 @@ def test_errno_attribute(): def test_create_missing_dirs(): # GH 24306 df = tm.makeDataFrame() - filepath = 'nonexistent/path/to/file.csv' - df.to_csv(filepath) - assert os.path.exists(filepath) - # Cleanup after test: - os.remove(filepath) - components = filepath.split('/') - for i in reversed(range(1, len(components))): - os.rmdir(os.path.join(*components[:i])) + with tm.ensure_clean() as fp: + full_path = os.path.join(fp, '/nonexistent/path/to/file.csv') + df.to_csv(full_path) + assert os.path.exists(full_path) From 2554d128be1f0085001fba68db2c657629df48df Mon Sep 17 00:00:00 2001 From: Rob Kimball Date: Sat, 26 Jun 2021 14:11:18 -0400 Subject: [PATCH 5/8] Removes wrong file path test in test_to_xml; intentionally fixed by this PR --- pandas/tests/io/xml/test_to_xml.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py index 1e2973075f98e..a6b4c118b9142 100644 --- a/pandas/tests/io/xml/test_to_xml.py +++ b/pandas/tests/io/xml/test_to_xml.py @@ -199,13 +199,6 @@ def test_str_output(datapath, parser): assert output == from_file_expected -def test_wrong_file_path(parser): - with pytest.raises( - FileNotFoundError, match=("No such file or directory|没有那个文件或目录") - ): - geom_df.to_xml("/my/fake/path/output.xml", parser=parser) - - # INDEX From 6464652df44237c898ae482389959a87ee3205a6 Mon Sep 17 00:00:00 2001 From: Rob Kimball Date: Sat, 26 Jun 2021 14:15:14 -0400 Subject: [PATCH 6/8] Removes similar example in test_parquet; intentionally fixed by this PR --- pandas/tests/io/test_parquet.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index d100c584b698a..ec1c986badf19 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -737,8 +737,6 @@ def test_expand_user(self, df_compat, monkeypatch): monkeypatch.setenv("USERPROFILE", "TestingUser") with pytest.raises(OSError, match=r".*TestingUser.*"): read_parquet("~/file.parquet") - with pytest.raises(OSError, match=r".*TestingUser.*"): - df_compat.to_parquet("~/file.parquet") def test_partition_cols_supported(self, pa, df_full): # GH #23283 From fed929f8c3bacc80d43b09a1fc1bc28c9dd3a9cc Mon Sep 17 00:00:00 2001 From: Rob Kimball Date: Sat, 26 Jun 2021 15:15:15 -0400 Subject: [PATCH 7/8] Updates new test to use correct context manager --- pandas/tests/io/test_common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 5053b189502b2..31aa0beb6618e 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -538,7 +538,7 @@ def test_errno_attribute(): def test_create_missing_dirs(): # GH 24306 df = tm.makeDataFrame() - with tm.ensure_clean() as fp: - full_path = os.path.join(fp, '/nonexistent/path/to/file.csv') + with tm.ensure_clean_dir() as fp: + full_path = os.path.join(fp, 'nonexistent/path/to/file.csv') df.to_csv(full_path) assert os.path.exists(full_path) From e8450017517a9a12b7962c31e68ce573b236be0b Mon Sep 17 00:00:00 2001 From: Rob Kimball Date: Sat, 26 Jun 2021 16:34:41 -0400 Subject: [PATCH 8/8] Reorder if conditions to short-circuit earlier --- pandas/io/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/common.py b/pandas/io/common.py index fca400871389b..bc198cb14ac9a 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -635,9 +635,9 @@ def get_handle( # If the parent directory doesn't exist initializing the stream will fail (GH 24306) if ( - _is_writable_mode(mode) - and is_path + is_path and not ioargs.parent_exists + and _is_writable_mode(mode) ): os.makedirs( os.path.dirname(ioargs.filepath_or_buffer),