Skip to content

ENH: Create directories when missing in to_* methods #42255

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 10 commits into from
42 changes: 42 additions & 0 deletions pandas/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ class IOArgs:
compression: CompressionDict
should_close: bool = False

def __post_init__(self):
self.parent_exists = dir_exists(self.filepath_or_buffer)


@dataclasses.dataclass
class IOHandles:
Expand Down Expand Up @@ -630,6 +633,17 @@ def get_handle(
compression_args = dict(ioargs.compression)
compression = compression_args.pop("method")

# If the parent directory doesn't exist initializing the stream will fail (GH 24306)
if (
is_path
and not ioargs.parent_exists
and _is_writable_mode(mode)
):
os.makedirs(
os.path.dirname(ioargs.filepath_or_buffer),
exist_ok=True,
)

if compression:
# compression libraries do not like an explicit text-mode
ioargs.mode = ioargs.mode.replace("t", "")
Expand Down Expand Up @@ -937,6 +951,26 @@ def file_exists(filepath_or_buffer: FilePathOrBuffer) -> bool:
return exists


def dir_exists(filepath_or_buffer: FilePathOrBuffer) -> bool:
"""Test whether parent directory exists."""
exists = False
filepath_or_buffer = stringify_path(filepath_or_buffer)
if not isinstance(filepath_or_buffer, str):
return exists

dirname = os.path.dirname(filepath_or_buffer)
if not len(dirname):
# This is the current working directory
exists = True
else:
try:
exists = os.path.exists(dirname)
# gh-5874: if the filepath is too long will raise here
except (TypeError, ValueError):
pass
return exists


def _is_binary_mode(handle: FilePathOrBuffer, mode: str) -> bool:
"""Whether the handle is opened in binary mode"""
# specified by user
Expand All @@ -951,3 +985,11 @@ def _is_binary_mode(handle: FilePathOrBuffer, mode: str) -> bool:
# classes that expect bytes
binary_classes = (BufferedIOBase, RawIOBase)
return isinstance(handle, binary_classes) or "b" in getattr(handle, "mode", mode)


def _is_writable_mode(mode: str) -> bool:
"""Whether the handle is opened in writable mode"""
writable_prefixes = ('a', 'w', 'r+')
if any(map(mode.startswith, writable_prefixes)):
return True
return False
9 changes: 9 additions & 0 deletions pandas/tests/io/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,3 +533,12 @@ def test_errno_attribute():
with pytest.raises(FileNotFoundError, match="\\[Errno 2\\]") as err:
pd.read_csv("doesnt_exist")
assert err.errno == errno.ENOENT


def test_create_missing_dirs():
# GH 24306
df = tm.makeDataFrame()
with tm.ensure_clean_dir() as fp:
full_path = os.path.join(fp, 'nonexistent/path/to/file.csv')
df.to_csv(full_path)
assert os.path.exists(full_path)
2 changes: 0 additions & 2 deletions pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -737,8 +737,6 @@ def test_expand_user(self, df_compat, monkeypatch):
monkeypatch.setenv("USERPROFILE", "TestingUser")
with pytest.raises(OSError, match=r".*TestingUser.*"):
read_parquet("~/file.parquet")
with pytest.raises(OSError, match=r".*TestingUser.*"):
df_compat.to_parquet("~/file.parquet")

def test_partition_cols_supported(self, pa, df_full):
# GH #23283
Expand Down
7 changes: 0 additions & 7 deletions pandas/tests/io/xml/test_to_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,13 +199,6 @@ def test_str_output(datapath, parser):
assert output == from_file_expected


def test_wrong_file_path(parser):
with pytest.raises(
FileNotFoundError, match=("No such file or directory|没有那个文件或目录")
):
geom_df.to_xml("/my/fake/path/output.xml", parser=parser)


# INDEX


Expand Down